Exemplo n.º 1
0
    def build_model(self):
        l2_regularization_kernel = 1e-5

        # Input Layer
        input = layers.Input(shape=(self.state_size,), name='input_states')

        # Hidden Layers
        model = layers.Dense(units=300, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(input)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        model = layers.Dense(units=400, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        model = layers.Dense(units=200, kernel_regularizer=regularizers.l2(l2_regularization_kernel))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        # Our output layer - a fully connected layer
        output = layers.Dense(units=self.action_size, activation='tanh', kernel_regularizer=regularizers.l2(l2_regularization_kernel),
                               kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3), name='output_actions')(model)

        # Keras model
        self.model = models.Model(inputs=input, outputs=output)

        # Define loss and optimizer
        action_gradients = layers.Input(shape=(self.action_size,))
        loss = K.mean(-action_gradients * output)
        optimizer = optimizers.Adam(lr=1e-4)

        update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss)
        self.train_fn = K.function(inputs=[self.model.input, action_gradients, K.learning_phase()],
            outputs=[], updates=update_operation)
Exemplo n.º 2
0
    def build_model(self):
        l2_kernel_regularization = 1e-5

        # Define input layers
        input_states = layers.Input(shape=(self.state_size, ),
                                    name='input_states')
        input_actions = layers.Input(shape=(self.action_size, ),
                                     name='input_actions')

        # Hidden layers for states
        model_states = layers.Dense(
            units=32,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                input_states)
        model_states = layers.BatchNormalization()(model_states)
        model_states = layers.LeakyReLU(1e-2)(model_states)

        model_states = layers.Dense(
            units=64,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                model_states)
        model_states = layers.BatchNormalization()(model_states)
        model_states = layers.LeakyReLU(1e-2)(model_states)

        # Hidden layers for actions
        model_actions = layers.Dense(
            units=64,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization))(
                input_actions)
        model_actions = layers.BatchNormalization()(model_actions)
        model_actions = layers.LeakyReLU(1e-2)(model_actions)

        # Both models merge here
        model = layers.add([model_states, model_actions])

        # Fully connected and batch normalization
        model = layers.Dense(units=32,
                             kernel_regularizer=regularizers.l2(
                                 l2_kernel_regularization))(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(1e-2)(model)

        # Q values / output layer
        Q_values = layers.Dense(
            units=1,
            activation=None,
            kernel_regularizer=regularizers.l2(l2_kernel_regularization),
            kernel_initializer=initializers.RandomUniform(minval=-5e-3,
                                                          maxval=5e-3),
            name='output_Q_values')(model)

        # Keras wrap the model
        self.model = models.Model(inputs=[input_states, input_actions],
                                  outputs=Q_values)
        optimizer = optimizers.Adam(lr=1e-2)
        self.model.compile(optimizer=optimizer, loss='mse')
        action_gradients = K.gradients(Q_values, input_actions)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Exemplo n.º 3
0
    def build_model(self):
        """Build an actor (policy) network that maps states -> actions."""
        # Define input layer (states)
        states = layers.Input(shape=(self.state_size, ), name='states')
        '''# Add hidden layers
        net = layers.Dense(units=32, activation='relu')(states)
        net = layers.Dense(units=64, activation='relu')(net)
        net = layers.Dense(units=32, activation='relu')(net)
        
        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # Add final output layer with sigmoid activation
        raw_actions = layers.Dense(units=self.action_size, activation='sigmoid',
            name='raw_actions')(net)
        '''
        ###################################
        # Add hidden layers
        net = layers.Dense(units=400,
                           kernel_regularizer=regularizers.l2(1e-6))(states)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)
        net = layers.Dense(units=300,
                           kernel_regularizer=regularizers.l2(1e-6))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Add final output layer with sigmoid activation
        raw_actions = layers.Dense(
            units=self.action_size,
            activation='sigmoid',
            name='raw_actions',
            kernel_initializer=initializers.RandomUniform(minval=-0.003,
                                                          maxval=0.003))(net)
        #######################################

        # Scale [0, 1] output for each action dimension to proper range
        actions = layers.Lambda(lambda x:
                                (x * self.action_range) + self.action_low,
                                name='actions')(raw_actions)

        # Create Keras model
        self.model = models.Model(inputs=states, outputs=actions)

        # Define loss function using action value (Q value) gradients
        action_gradients = layers.Input(shape=(self.action_size, ))
        loss = K.mean(-action_gradients * actions)

        # Incorporate any additional losses here (e.g. from regularizers)

        # Define optimizer and training function
        optimizer = optimizers.Adam(lr=1e-6)
        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)
Exemplo n.º 4
0
    def build_model(self):
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        # Define input layers
        states = layers.Input(shape=(self.state_size, ), name='states')
        actions = layers.Input(shape=(self.action_size, ), name='actions')

        # Add hidden layer(s) for state pathway
        net_states = layers.Dense(
            units=300, kernel_regularizer=regularizers.l2(0.00001))(states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(0.01)(net_states)
        net_states = layers.Dense(
            units=400, kernel_regularizer=regularizers.l2(0.00001))(net_states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(0.01)(net_states)

        # Add hidden layer(s) for action pathway
        net_actions = layers.Dense(
            units=400, kernel_regularizer=regularizers.l2(0.00001))(actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(0.01)(net_actions)

        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # Combine state and action pathways
        net = layers.Add()([net_states, net_actions])
        net = layers.Activation('relu')(net)

        # Add more layers to the combined network if needed
        net = layers.Dense(units=256,
                           kernel_regularizer=regularizers.l2(0.00001))(net)
        net = layers.BatchNormalization()(net)
        net = layers.Activation(activation='relu')(net)
        # Add final output layer to prduce action values (Q values)
        Q_values = layers.Dense(units=1,
                                activation=None,
                                name='q_values',
                                kernel_initializer=initializers.RandomUniform(
                                    minval=-3e-4, maxval=3e-4))(net)

        # Create Keras model
        self.model = models.Model(inputs=[states, actions], outputs=Q_values)

        # Define optimizer and compile model for training with built-in loss function
        optimizer = optimizers.Adam()
        self.model.compile(optimizer=optimizer, loss='mse')

        # Compute action gradients (derivative of Q values w.r.t. to actions)
        action_gradients = K.gradients(Q_values, actions)

        # Define an additional function to fetch action gradients (to be used by actor model)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Exemplo n.º 5
0
 def add_resnet_unit(path, **params):
     block_input = path
     # add Conv2D
     path = L.Convolution2D(
         filters=params["filters_per_layer"],
         kernel_size=params["filter_width"],
         activation='linear',
         padding='same',
         kernel_regularizer=R.l2(.0001),
         bias_regularizer=R.l2(.0001))(path)
     print path
     path = L.BatchNormalization(
             beta_regularizer=R.l2(.0001),
             gamma_regularizer=R.l2(.0001))(path)
     print path
     path = L.Activation('relu')(path)
     print path
     path = L.Convolution2D(
         filters=params["filters_per_layer"],
         kernel_size=params["filter_width"],
         activation='linear',
         padding='same',
         kernel_regularizer=R.l2(.0001),
         bias_regularizer=R.l2(.0001))(path)
     print path
     path = L.BatchNormalization(
             beta_regularizer=R.l2(.0001),
             gamma_regularizer=R.l2(.0001))(path)
     print path
     path = L.Add()([block_input, path])
     print path
     path = L.Activation('relu')(path)
     print path
     return path
Exemplo n.º 6
0
def res_block(inputs, size):
    kernel_l2_reg = 1e-3
    net = layers.Dense(size,
                       activation=None,
                       kernel_regularizer=regularizers.l2(kernel_l2_reg),
                       kernel_initializer=initializers.RandomUniform(
                           minval=-5e-3, maxval=5e-3))(inputs)
    net = layers.BatchNormalization()(net)
    net = layers.LeakyReLU(1e-2)(net)

    net = layers.Dense(size,
                       activation=None,
                       kernel_regularizer=regularizers.l2(kernel_l2_reg),
                       kernel_initializer=initializers.RandomUniform(
                           minval=-5e-3, maxval=5e-3))(net)
    net = layers.BatchNormalization()(net)
    net = layers.LeakyReLU(1e-2)(net)
    net = layers.add([inputs, net])
    return net
def residual_layer(input_tensor, nb_in_filters=64, nb_bottleneck_filters=16, filter_sz=3, stage=0, reg=0.0):

    bn_name = 'bn' + str(stage)
    conv_name = 'conv' + str(stage)
    relu_name = 'relu' + str(stage)
    merge_name = 'add' + str(stage)

    # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv
    if stage>1: # first activation is just after conv1
        x = layers.BatchNormalization(axis=-1, name=bn_name+'a')(input_tensor)
        x = layers.Activation('relu', name=relu_name+'a')(x)
    else:
        x = input_tensor

    x = layers.Conv2D(nb_bottleneck_filters, (1, 1),
                      kernel_initializer='glorot_normal',
                      kernel_regularizer=regularizers.l2(reg),
                      use_bias=False,
                      name=conv_name+'a')(x)

    # batchnorm-relu-conv, from nb_bottleneck_filters to nb_bottleneck_filters via FxF conv
    x = layers.BatchNormalization(axis=-1, name=bn_name+'b')(x)
    x = layers.Activation('relu', name=relu_name+'b')(x)
    x = layers.Conv2D(nb_bottleneck_filters, (filter_sz, filter_sz),
                      padding='same',
                      kernel_initializer='glorot_normal',
                      kernel_regularizer=regularizers.l2(reg),
                      use_bias = False,
                      name=conv_name+'b')(x)

    # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv
    x = layers.BatchNormalization(axis=-1, name=bn_name+'c')(x)
    x = layers.Activation('relu', name=relu_name+'c')(x)
    x = layers.Conv2D(nb_in_filters, (1, 1),
                      kernel_initializer='glorot_normal',
                      kernel_regularizer=regularizers.l2(reg),
                      name=conv_name+'c')(x)

    # merge
    x = layers.add([x, input_tensor], name=merge_name)

    return x
Exemplo n.º 8
0
    def __init__(self, l2_reg):
        super().__init__()

        reg = _regs.l2(l2_reg)
        initializer = tf.contrib.layers.xavier_initializer()
        self._conv1 = _layers.Conv3D(64,
                                     3,
                                     strides=2,
                                     padding="same",
                                     name="conv1",
                                     activation=tf.nn.elu,
                                     kernel_regularizer=reg,
                                     kernel_initializer=initializer)
        self._conv2 = _layers.Conv3D(64,
                                     3,
                                     strides=2,
                                     padding="same",
                                     name="conv2",
                                     activation=tf.nn.elu,
                                     kernel_regularizer=reg,
                                     kernel_initializer=initializer)
        self._conv3 = _layers.Conv3D(128,
                                     3,
                                     strides=2,
                                     padding="same",
                                     name="conv3",
                                     activation=tf.nn.elu,
                                     kernel_regularizer=reg,
                                     kernel_initializer=initializer)
        self._conv4 = _layers.Conv3D(256,
                                     3,
                                     strides=2,
                                     padding="same",
                                     name="conv4",
                                     activation=tf.nn.elu,
                                     kernel_regularizer=reg,
                                     kernel_initializer=initializer)
        self._conv5 = _layers.Conv3D(512,
                                     3,
                                     strides=2,
                                     padding="same",
                                     name="conv5",
                                     activation=tf.nn.elu,
                                     kernel_regularizer=reg,
                                     kernel_initializer=initializer)
        self._conv6 = _layers.Conv3D(512,
                                     3,
                                     strides=2,
                                     padding="same",
                                     name="conv6",
                                     activation=tf.nn.elu,
                                     kernel_regularizer=reg,
                                     kernel_initializer=initializer)
Exemplo n.º 9
0
    def build_model(self): 
        states = layers.Input(shape=(self.state_size,), name='inputStates')

        # Hidden Layers
        model = layers.Dense(units=128, activation='linear')(states)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)
        model = layers.Dropout(0.3)(model)
        
        model = layers.Dense(units=256, activation='linear')(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)
        model = layers.Dropout(0.3)(model)

        model = layers.Dense(units=512, activation='linear')(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)
        model = layers.Dropout(0.3)(model)

        model = layers.Dense(units=128, activation='linear')(model)
        model = layers.BatchNormalization()(model)
        model = layers.LeakyReLU(0.01)(model)
        model = layers.Dropout(0.3)(model)

        output = layers.Dense(
            units=self.action_size, 
            activation='tanh', 
            kernel_regularizer=regularizers.l2(0.01),
            name='outputActions')(model)

        #Keras
        self.model = models.Model(inputs=states, outputs=output)

        #Definint Optimizer
        actionGradients = layers.Input(shape=(self.action_size,))
        loss = K.mean(-actionGradients * output)
        optimizer = optimizers.Adam()
        update_operation = optimizer.get_updates(params=self.model.trainable_weights, loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, actionGradients, K.learning_phase()],
            outputs=[], 
            updates=update_operation)
def generate_model():
    conv_base = tf.contrib.keras.applications.VGG16(include_top=False,
                                                    weights='imagenet',
                                                    input_shape=(IMG_WIDTH,
                                                                 IMG_HEIGHT,
                                                                 3))
    conv_base.trainable = True
    model = models.Sequential()
    model.add(conv_base)
    model.add(layers.Flatten())
    model.add(
        layers.Dense(HIDDEN_SIZE,
                     name='dense',
                     kernel_regularizer=regularizers.l2(L2_LAMBDA)))
    model.add(layers.Dropout(rate=0.3, name='dropout'))
    model.add(
        layers.Dense(NUM_CLASSES, activation='softmax', name='dense_output'))
    model = multi_gpu_model(model, gpus=NUM_GPUS)
    print(model.summary())
    return model
Exemplo n.º 11
0
        def add_resnet_unit(path, **params):
            """Add a resnet unit to path
            Returns new path
            """

            block_input = path
            # add Conv2D
            path = L.Convolution2D(
                filters=params["filters_per_layer"],
                kernel_size=params["filter_width"],
                activation='linear',
                padding='same',
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(path)
            # add BatchNorm
            path = L.BatchNormalization(
                    beta_regularizer=R.l2(.0001),
                    gamma_regularizer=R.l2(.0001))(path)
            # add ReLU
            path = L.Activation('relu')(path)
            # add Conv2D
            path = L.Convolution2D(
                filters=params["filters_per_layer"],
                kernel_size=params["filter_width"],
                activation='linear',
                padding='same',
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(path)
            # add BatchNorm
            path = L.BatchNormalization(
                    beta_regularizer=R.l2(.0001),
                    gamma_regularizer=R.l2(.0001))(path)
            # Merge 'input layer' with the path
            path = L.Add()([block_input, path])
            # add ReLU
            path = L.Activation('relu')(path)
            return path
Exemplo n.º 12
0
    def shortcut(self, input, residual):
        """Adds a shortcut between input and residual block and merges them with "sum"
        """
        # Expand channels of shortcut to match residual.
        # Stride appropriately to match residual (width, height)
        # Should be int if network architecture is correctly configured.
        input_shape = K.int_shape(input)
        #residual_shape = K.int_shape(residual)


        try:
             residual_shape = np.shape(residual).as_list()
        except:
             residual_shape = np.shape(residual)


        stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
        stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
        equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]
        

        #equal_width = input_shape[ROW_AXIS] == residual_shape[ROW_AXIS]
        #equal_heights = input_shape[COL_AXIS] == residual_shape[COL_AXIS]

        shortcut = input
        # 1 X 1 conv if shape is different. Else identity.
        if stride_width > 1 or stride_height > 1 or not equal_channels:
        #if not equal_width or not equal_height or not equal_channels:
            shortcut = layers.Conv2D(filters=residual_shape[CHANNEL_AXIS],
                              kernel_size=(1, 1),
                              strides=(stride_width, stride_height),
                              padding="valid",
                              kernel_initializer="he_normal",
                              kernel_regularizer=regularizers.l2(0.0001))(input)

        return layers.add([shortcut, residual])
Exemplo n.º 13
0
    weights='imagenet',
    input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)  # 3 per i canali RGB
)
conv_base.summary()
# stampa info sulla base conv

# RETE
# Layer finali per la vgg
model = models.Sequential()
model.add(conv_base)
model.add(layers.Flatten())

model.add(
    layers.Dense(512,
                 name='dense_1',
                 kernel_regularizer=regularizers.l2(L2_LAMBDA)))
model.add(layers.Activation(activation='relu', name='activation_1'))

model.add(layers.Dense(NUM_CLASSES, activation='softmax', name='dense_output'))
model.summary()

conv_base.trainable = False
model.summary()


def load_batch(file_list):
    img_array = []
    idx_array = []
    label_array = []

    for file_ in file_list:
Exemplo n.º 14
0
def create_model(embeddings, config=get_config(), sentence_length=100):

    config['sentence_length'] = sentence_length

    # sentence attention
    attention_input = Input(shape=(
        config['sentence_length'] - 2,
        config['embedding_size'],
    ),
                            dtype='float32')

    x = Permute((2, 1))(attention_input)
    x = Reshape((config['embedding_size'], config['sentence_length'] - 2))(x)
    x = Dense(config['sentence_length'] - 2, activation='softmax',
              bias=True)(x)

    x = Lambda(lambda x: K.mean(x, axis=1),
               name='attention_vector_sentence')(x)
    x = RepeatVector(config['embedding_size'])(x)
    # x = Lambda(lambda x: x, name='attention_vector_sentence')(x)

    attention_probabilities = Permute((2, 1))(x)

    x = multiply([attention_input, attention_probabilities],
                 name='attention_mul')
    x = Lambda(lambda x: K.sum(x, axis=1))(x)

    sentence_attention = Model(attention_input, x, name='sentence_attention')

    embedding_layer = Embedding(
        embeddings.shape[0],
        embeddings.shape[1],
        input_length=config['sentence_length'],
        trainable=False,
        weights=[embeddings],
    )

    input1 = Input(shape=(config['sentence_length'], ),
                   dtype='int32',
                   name='input_1')
    x1 = embedding_layer(input1)
    x1 = SpatialDropout1D(config['embedding_dropout'])(x1)
    x1 = Attention()(x1)

    input2 = Input(shape=(config['sentence_length'], ),
                   dtype='int32',
                   name='input_2')
    x2 = embedding_layer(input2)
    x2 = SpatialDropout1D(config['embedding_dropout'])(x2)
    x2 = Attention()(x2)

    x = concatenate([x1, x2])

    if config['dense_layer_size']:
        x = Dense(config['dense_layer_size'],
                  activation='relu',
                  kernel_regularizer=regularizers.l2(
                      config['l2_reg_lambda']))(x)
        x = Dropout(config['dropout_prob'])(x)

    output = Dense(1, activation='sigmoid')(x)
    merged_tensor = merge([tensor_a, tensor_b], mode='cos', dot_axes=-1)

    model = Model(inputs=(input1, input2), outputs=output)

    return model, config
Exemplo n.º 15
0
def create_model(embeddings, config=get_config(), sentence_length=100):

    config['sentence_length']=sentence_length

    # sentence attention
    attention_input = Input(shape=(config['sentence_length'], config['embedding_size'],), dtype='float32')

    x = Permute((2, 1))(attention_input)
    x = Reshape((config['embedding_size'], config['sentence_length']))(x)
    x = Dense(config['sentence_length'], activation='softmax', bias=True)(x)

    x = Lambda(lambda x: K.mean(x, axis=1), name='attention_vector_sentence')(x)
    x = RepeatVector(config['embedding_size'])(x)
    # x = Lambda(lambda x: x, name='attention_vector_sentence')(x)

    attention_probabilities = Permute((2, 1))(x)

    x = multiply([attention_input, attention_probabilities], name='attention_mul')
    #x = Lambda(lambda x: K.sum(x, axis=1))(x)

    sentence_attention = Model(attention_input, x, name='sentence_attention')

    embedding_layer = Embedding(
            embeddings.shape[0],
            embeddings.shape[1],
            input_length=config['sentence_length'],
            trainable=False,
            weights=[embeddings],
        )

    input = Input(shape=(config['sentence_length'],), dtype='int32')
    x = embedding_layer(input)
    x = SpatialDropout1D(config['dropout_embedding'])(x)

    x_att = sentence_attention(x)
    x = BatchNormalization()(x)

    x_att_sum = Lambda(lambda x: K.sum(x, axis=1))(x_att)

    conv_results = []

    for k, d in enumerate(config['cnn_dilation_rates'].split(',')):

        for i, w in enumerate(config['cnn_windows'][k].split(',')):

            strides = int(config['cnn_filter_strides'].split(',')[i])

            window_size = int(w)

            if window_size <= config['sentence_length'] / int(d):

                for j, p in enumerate(config['cnn_pool_sizes'][k].split(',')):
                    if 'all' == p:
                        pool_size = config['sentence_length'] / int(d) - window_size + 1
                    else:
                        pool_size = int(p)

                    if pool_size > config['sentence_length'] / int(d) - window_size + 1:
                        pool_size = config['sentence_length'] / int(d) - window_size + 1

                    conv = Conv1D(
                        config['cnn_num_filters'],
                        window_size,
                        activation='relu',
                        use_bias=True,
                        kernel_regularizer=regularizers.l2(config['l2_reg_lambda']),
                        dilation_rate=int(d),
                        strides=strides
                    )

                    conv_results.append(conv(x_att))
                    conv_results[-1] = MaxPooling1D(pool_size, padding='valid')(conv_results[-1])
                    conv_results[-1] = Flatten()(conv_results[-1])
                    conv_results[-1] = ActivityRegularization(l2=config['l2_reg_lambda'])(conv_results[-1])

            else:
                raise Exception("Window too big .. exiting...")

    conv_results.append(x_att_sum)
    if len(conv_results) > 1:
        x = concatenate(conv_results, axis=1)
    else:
        x = conv_results[0]

    if config['dense_layer_size']:
        x = Dense(config['dense_layer_size'], activation='relu',
                  kernel_regularizer=regularizers.l2(config['l2_reg_lambda']))(x)
        x = Dropout(config['dropout_prob'])(x)

    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=input, outputs=output)

    return model, config
Exemplo n.º 16
0
    def create_network(**kwargs):
        model_input = L.Input(shape=(17, 9, 9))
        print model_input
        
        convolution_path = L.Convolution2D(
            input_shape=(),
            filters=64,
            kernel_size=3,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(model_input)
        print convolution_path
        convolution_path = L.BatchNormalization(
            beta_regularizer=R.l2(.0001),
            gamma_regularizer=R.l2(.0001))(convolution_path)
        print convolution_path
        convolution_path = L.Activation('relu')(convolution_path)

        convolution_path = L.Convolution2D(
            input_shape=(),
            filters=128,
            kernel_size=3,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(convolution_path)
        print convolution_path
        convolution_path = L.BatchNormalization(
            beta_regularizer=R.l2(.0001),
            gamma_regularizer=R.l2(.0001))(convolution_path)
        print convolution_path
        convolution_path = L.Activation('relu')(convolution_path)


        print '------------- value -------------------'            
        # policy head
        policy_path = L.Convolution2D(
            input_shape=(),
            filters=2,
            kernel_size=1,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(convolution_path)
        print policy_path
        policy_path = L.BatchNormalization(
                beta_regularizer=R.l2(.0001),
                gamma_regularizer=R.l2(.0001))(policy_path)
        policy_path = L.Activation('relu')(policy_path)
        print policy_path
        policy_path = L.Flatten()(policy_path)
        print policy_path
        policy_path = L.Dense(
                (9*9)+1,
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(policy_path)
        policy_output = L.Activation('softmax')(policy_path)
        print 'policy_output', policy_output

        print '------------- policy -------------------'
        
        # value head
        value_path = L.Convolution2D(
            input_shape=(),
            filters=1,
            kernel_size=1,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(convolution_path)
        print value_path
        value_path = L.BatchNormalization(
                beta_regularizer=R.l2(.0001),
                gamma_regularizer=R.l2(.0001))(value_path)
        value_path = L.Activation('relu')(value_path)
        print value_path
        value_path = L.Flatten()(value_path)
        print value_path
        value_path = L.Dense(
                256,
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(value_path)
        print value_path
        value_path = L.Activation('relu')(value_path)
        print value_path
        value_path = L.Dense(
                1,
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(value_path)
        print value_path
        value_output = L.Activation('tanh')(value_path)
        print value_path

        return M.Model(inputs=[model_input], outputs=[policy_output, value_output])
Exemplo n.º 17
0
def create_model(embeddings, config=get_config(), sentence_length=100):

    config['sentence_length']=sentence_length

    # sentence attention
    attention_input = Input(shape=(config['sentence_length'] - 2, config['embedding_size'],), dtype='float32')

    x = Permute((2, 1))(attention_input)
    x = Reshape((config['embedding_size'], config['sentence_length'] - 2))(x)
    x = Dense(config['sentence_length'] - 2, activation='softmax', bias=True)(x)

    x = Lambda(lambda x: K.mean(x, axis=1), name='attention_vector_sentence')(x)
    x = RepeatVector(config['embedding_size'])(x)
    # x = Lambda(lambda x: x, name='attention_vector_sentence')(x)

    attention_probabilities = Permute((2, 1))(x)

    x = multiply([attention_input, attention_probabilities], name='attention_mul')
    x = Lambda(lambda x: K.sum(x, axis=1))(x)

    sentence_attention = Model(attention_input, x, name='sentence_attention')

    embedding_layer = Embedding(
            embeddings.shape[0],
            embeddings.shape[1],
            input_length=config['sentence_length'],
            trainable=False,
            weights=[embeddings],
        )

    input = Input(shape=(config['sentence_length'],), dtype='int32')
    x = embedding_layer(input)
    x = SpatialDropout1D(config['dropout_embedding'])(x)

    x = Conv1D(
        config['cnn_num_filters'],
        3,
        # activation='relu',
        use_bias=True,
        # kernel_regularizer=regularizers.l2(config['l2_reg_lambda']),
        # strides=1
    )(x)


    # x = MaxPooling1D(1, padding='valid')(x)
    # x = Flatten()(x)
    #x = Attention()(x)

    #x = Bidirectional(GRU(config['lstm_layer_size'], return_sequences=config['attention'], recurrent_dropout=config['recurrent_dropout'], dropout=config['dropout_prob']))(x)
    # x = GRU(config['lstm_layer_size'], return_sequences=config['attention'], recurrent_dropout=config['recurrent_dropout'], dropout=config['dropout_prob'])(x)

    if config['attention']:
        #x = sentence_attention(x)
        x = Attention()(x)

    # x = ActivityRegularization(l2=config['l2_reg_lambda'])(x)

    #
    # conv_results[-1] = ActivityRegularization(l2=config['l2_reg_lambda'])(conv_results[-1])

    if config['dense_layer_size']:
        x = Dense(config['dense_layer_size'], activation='relu',
                  kernel_regularizer=regularizers.l2(config['l2_reg_lambda']))(x)
        x = Dropout(config['dropout_prob'])(x)

    output = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=input, outputs=output)

    return model, config
Exemplo n.º 18
0
    def build_model(self):
        kernel_l2_reg = 1e-5
        """Build an actor (policy) network that maps states -> actions."""
        # Define input layer (states)
        states = layers.Input(shape=(self.state_size, ), name='states')

        # size_repeat = 30
        # block_size = size_repeat*self.state_size
        # print("Actor block size = {}".format(block_size))
        #
        # net = layers.concatenate([states]*size_repeat)
        # # net = layers.Dense(block_size,
        # #                    # kernel_initializer=initializers.RandomNormal(mean=1.0, stddev=0.1),
        # #                    #  bias_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01),
        # #                    activation=None,
        # #                    use_bias=False)(states)
        # net = layers.BatchNormalization()(net)
        # net = layers.Dropout(0.2)(net)
        # # net = layers.LeakyReLU(1e-2)(net)
        #
        # for _ in range(5):
        #     net = res_block(net, block_size)

        # Add hidden layers
        net = layers.Dense(
            units=300,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(states)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        net = layers.Dense(
            units=400, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        net = layers.Dense(
            units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Try different layer sizes, activations, add batch normalization, regularizers, etc.

        # # Add final output layer with sigmoid activation
        # raw_actions = layers.Dense(units=self.action_size,
        #                            activation='sigmoid',
        #                            # kernel_regularizer=regularizers.l2(kernel_l2_reg),
        #                            kernel_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
        #                            # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
        #                            name='raw_actions')(net)
        #
        # # Scale [0, 1] output for each action dimension to proper range
        # actions = layers.Lambda(lambda x: (x * self.action_range) + self.action_low, name='actions')(raw_actions)

        actions = layers.Dense(
            units=self.action_size,
            activation='tanh',
            kernel_regularizer=regularizers.l2(kernel_l2_reg),
            kernel_initializer=initializers.RandomUniform(minval=-3e-3,
                                                          maxval=3e-3),
            name='actions')(net)

        # Create Keras model
        self.model = models.Model(inputs=states, outputs=actions)

        # Define loss function using action value (Q value) gradients
        action_gradients = layers.Input(shape=(self.action_size, ))
        loss = K.mean(-action_gradients * actions)

        # Incorporate any additional losses here (e.g. from regularizers)

        # Define optimizer and training function
        optimizer = optimizers.Adam(lr=1e-4)

        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)
Exemplo n.º 19
0
    def create_network(**kwargs):
        """construct a convolutional neural network with Residual blocks.
        Arguments are the same as with the default CNNPolicy network, except the default
        number of layers is 20 plus a new n_skip parameter

        Keword Arguments:
        - input_dim:             depth of features to be processed by first layer (default 17)
        - board:                 width of the go board to be processed (default 19)
        - filters_per_layer:     number of filters used on every layer (default 256)
        - layers:                number of residual blocks (default 19)
        - filter_width:          width of filter
                                 Must be odd.
        """
        defaults = {
            "input_dim": 17,
            "board": 9,
            "filters_per_layer": 64,
            "layers": 9,
            "filter_width": 3
        }

        # copy defaults, but override with anything in kwargs
        params = defaults
        params.update(kwargs)

        # create the network using Keras' functional API,
        model_input = L.Input(shape=(params["input_dim"], params["board"], params["board"]))
        print model_input
        # create first layer
        convolution_path = L.Convolution2D(
            input_shape=(),
            filters=params["filters_per_layer"],
            kernel_size=params["filter_width"],
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(model_input)
        print convolution_path
        convolution_path = L.BatchNormalization(
            beta_regularizer=R.l2(.0001),
            gamma_regularizer=R.l2(.0001))(convolution_path)
        print convolution_path
        convolution_path = L.Activation('relu')(convolution_path)
        def add_resnet_unit(path, **params):
            block_input = path
            # add Conv2D
            path = L.Convolution2D(
                filters=params["filters_per_layer"],
                kernel_size=params["filter_width"],
                activation='linear',
                padding='same',
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(path)
            print path
            path = L.BatchNormalization(
                    beta_regularizer=R.l2(.0001),
                    gamma_regularizer=R.l2(.0001))(path)
            print path
            path = L.Activation('relu')(path)
            print path
            path = L.Convolution2D(
                filters=params["filters_per_layer"],
                kernel_size=params["filter_width"],
                activation='linear',
                padding='same',
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(path)
            print path
            path = L.BatchNormalization(
                    beta_regularizer=R.l2(.0001),
                    gamma_regularizer=R.l2(.0001))(path)
            print path
            path = L.Add()([block_input, path])
            print path
            path = L.Activation('relu')(path)
            print path
            return path

        # create all other layers
        for _ in range(params['layers']):
            convolution_path = add_resnet_unit(convolution_path, **params)

        print '------------- policy -------------------'            
        # policy head
        policy_path = L.Convolution2D(
            input_shape=(),
            filters=2,
            kernel_size=1,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(convolution_path)
        print policy_path
        policy_path = L.BatchNormalization(
                beta_regularizer=R.l2(.0001),
                gamma_regularizer=R.l2(.0001))(policy_path)
        policy_path = L.Activation('relu')(policy_path)
        print policy_path
        policy_path = L.Flatten()(policy_path)
        print policy_path
        policy_path = L.Dense(
                params["board"]*params["board"]+1,
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(policy_path)
        policy_output = L.Activation('softmax')(policy_path)
        print 'policy_output', policy_output

        print '-------------value -------------------'
        
        # value head
        value_path = L.Convolution2D(
            input_shape=(),
            filters=1,
            kernel_size=1,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(convolution_path)
        print value_path
        value_path = L.BatchNormalization(
                beta_regularizer=R.l2(.0001),
                gamma_regularizer=R.l2(.0001))(value_path)
        value_path = L.Activation('relu')(value_path)
        print value_path
        value_path = L.Flatten()(value_path)
        print value_path
        value_path = L.Dense(
                256,
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(value_path)
        print value_path
        value_path = L.Activation('relu')(value_path)
        print value_path
        value_path = L.Dense(
                1,
                kernel_regularizer=R.l2(.0001),
                bias_regularizer=R.l2(.0001))(value_path)
        print value_path
        value_output = L.Activation('tanh')(value_path)
        print value_path

        return M.Model(inputs=[model_input], outputs=[policy_output, value_output])
Exemplo n.º 20
0
    def build_model(self):
        kernel_l2_reg = 1e-5

        # Dense Options
        # units = 200,
        # activation='relu',
        # activation = None,
        # activity_regularizer=regularizers.l2(0.01),
        # kernel_regularizer=regularizers.l2(kernel_l2_reg),
        # bias_initializer=initializers.Constant(1e-2),
        # use_bias = True
        # use_bias=False
        """Build a critic (value) network that maps (state, action) pairs -> Q-values."""
        # Define input layers
        states = layers.Input(shape=(self.state_size, ), name='states')
        actions = layers.Input(shape=(self.action_size, ), name='actions')

        # size_repeat = 30
        # state_size = size_repeat*self.state_size
        # action_size = size_repeat*self.action_size
        # block_size = size_repeat*self.state_size + size_repeat*self.action_size
        # print("Critic block size = {}".format(block_size))
        #
        # net_states = layers.concatenate(size_repeat * [states])
        # net_states = layers.BatchNormalization()(net_states)
        # net_states = layers.Dropout(0.2)(net_states)
        #
        # net_actions = layers.concatenate(size_repeat * [actions])
        # net_actions = layers.BatchNormalization()(net_actions)
        # net_actions = layers.Dropout(0.2)(net_actions)
        #
        # # State pathway
        # for _ in range(3):
        #     net_states = res_block(net_states, state_size)
        #
        # # Action pathway
        # for _ in range(2):
        #     net_actions = res_block(net_actions, action_size)
        #
        # # Merge state and action pathways
        # net = layers.concatenate([net_states, net_actions])
        #
        # # Final blocks
        # for _ in range(3):
        #     net = res_block(net, block_size)

        # Add hidden layer(s) for state pathway
        net_states = layers.Dense(
            units=300,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        net_states = layers.Dense(
            units=400,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(net_states)
        net_states = layers.BatchNormalization()(net_states)
        net_states = layers.LeakyReLU(1e-2)(net_states)

        # Add hidden layer(s) for action pathway
        net_actions = layers.Dense(
            units=400,
            kernel_regularizer=regularizers.l2(kernel_l2_reg))(actions)
        net_actions = layers.BatchNormalization()(net_actions)
        net_actions = layers.LeakyReLU(1e-2)(net_actions)

        # Merge state and action pathways
        net = layers.add([net_states, net_actions])

        net = layers.Dense(
            units=200, kernel_regularizer=regularizers.l2(kernel_l2_reg))(net)
        net = layers.BatchNormalization()(net)
        net = layers.LeakyReLU(1e-2)(net)

        # Add final output layer to prduce action values (Q values)
        Q_values = layers.Dense(
            units=1,
            activation=None,
            kernel_regularizer=regularizers.l2(kernel_l2_reg),
            kernel_initializer=initializers.RandomUniform(minval=-5e-3,
                                                          maxval=5e-3),
            # bias_initializer=initializers.RandomUniform(minval=-3e-3, maxval=3e-3),
            name='q_values')(net)

        # Create Keras model
        self.model = models.Model(inputs=[states, actions], outputs=Q_values)

        # Define optimizer and compile model for training with built-in loss function
        optimizer = optimizers.Adam(lr=1e-2)

        self.model.compile(optimizer=optimizer, loss='mse')

        # Compute action gradients (derivative of Q values w.r.t. to actions)
        action_gradients = K.gradients(Q_values, actions)

        # Define an additional function to fetch action gradients (to be used by actor model)
        self.get_action_gradients = K.function(
            inputs=[*self.model.input, K.learning_phase()],
            outputs=action_gradients)
Exemplo n.º 21
0
    def create_network(**kwargs):
        model_input = L.Input(shape=(17, 9, 9))
        print model_input

        convolution_path = L.Convolution2D(
            input_shape=(),
            filters=64,
            kernel_size=3,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(model_input)
        print convolution_path
        convolution_path = L.BatchNormalization(
            beta_regularizer=R.l2(.0001),
            gamma_regularizer=R.l2(.0001))(convolution_path)
        print convolution_path
        convolution_path = L.Activation('relu')(convolution_path)

        convolution_path = L.Convolution2D(
            input_shape=(),
            filters=128,
            kernel_size=3,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(convolution_path)
        print convolution_path
        convolution_path = L.BatchNormalization(
            beta_regularizer=R.l2(.0001),
            gamma_regularizer=R.l2(.0001))(convolution_path)
        print convolution_path
        convolution_path = L.Activation('relu')(convolution_path)

        print '------------- value -------------------'
        # policy head
        policy_path = L.Convolution2D(
            input_shape=(),
            filters=2,
            kernel_size=1,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(convolution_path)
        print policy_path
        policy_path = L.BatchNormalization(
            beta_regularizer=R.l2(.0001),
            gamma_regularizer=R.l2(.0001))(policy_path)
        policy_path = L.Activation('relu')(policy_path)
        print policy_path
        policy_path = L.Flatten()(policy_path)
        print policy_path
        policy_path = L.Dense((9 * 9) + 1,
                              kernel_regularizer=R.l2(.0001),
                              bias_regularizer=R.l2(.0001))(policy_path)
        policy_output = L.Activation('softmax')(policy_path)
        print 'policy_output', policy_output

        print '------------- policy -------------------'

        # value head
        value_path = L.Convolution2D(
            input_shape=(),
            filters=1,
            kernel_size=1,
            activation='linear',
            padding='same',
            kernel_regularizer=R.l2(.0001),
            bias_regularizer=R.l2(.0001))(convolution_path)
        print value_path
        value_path = L.BatchNormalization(
            beta_regularizer=R.l2(.0001),
            gamma_regularizer=R.l2(.0001))(value_path)
        value_path = L.Activation('relu')(value_path)
        print value_path
        value_path = L.Flatten()(value_path)
        print value_path
        value_path = L.Dense(256,
                             kernel_regularizer=R.l2(.0001),
                             bias_regularizer=R.l2(.0001))(value_path)
        print value_path
        value_path = L.Activation('relu')(value_path)
        print value_path
        value_path = L.Dense(1,
                             kernel_regularizer=R.l2(.0001),
                             bias_regularizer=R.l2(.0001))(value_path)
        print value_path
        value_output = L.Activation('tanh')(value_path)
        print value_path

        return M.Model(inputs=[model_input],
                       outputs=[policy_output, value_output])

# 92% of accuracy
sz_ly0_filters, nb_ly0_filters, nb_ly0_stride = (128,3,2)
sz_res_filters, nb_res_filters, nb_res_stages = (3,32,25)


  
    
img_input = layers.Input(shape=(32,32,3), name='cifar')

# Initial layers
x = layers.Conv2D(sz_ly0_filters, (nb_ly0_filters,nb_ly0_filters),
                  strides=(nb_ly0_stride, nb_ly0_stride), padding='same', 
                  kernel_initializer='glorot_normal',
                  kernel_regularizer=regularizers.l2(1.e-4),
                  use_bias=False, name='conv0')(img_input)

x = layers.BatchNormalization(axis=-1, name='bn0')(x)
x = layers.Activation('relu', name='relu0')(x)

# Resnet layers
for stage in range(1, nb_res_stages+1):
    x = residual_layer(x, 
                       nb_in_filters=sz_ly0_filters,
                       nb_bottleneck_filters=nb_res_filters,
                       filter_sz=sz_res_filters, 
                       stage=stage,
                       reg=0.0)

# Complete last resnet layer