Exemplo n.º 1
0
 def __init__(self, **conv_kwarg):
     super(Basic_CNN, self).__init__()
     self.conv_1 = conv('conv_1',
                        32, (8, 8),
                        strides=4,
                        padding='same',
                        gain=np.sqrt(2),
                        act='relu',
                        **conv_kwarg)
     self.conv_2 = conv('conv_2',
                        64, (4, 4),
                        strides=2,
                        padding='same',
                        gain=np.sqrt(2),
                        act='relu',
                        **conv_kwarg)
     self.conv_3 = conv('conv_3',
                        64, (1, 1),
                        strides=1,
                        padding='same',
                        gain=np.sqrt(2),
                        act='relu',
                        **conv_kwarg)
     self.flatten_1 = Flatten()
     self.dense_1 = Dense(256,
                          activation='relu',
                          name='fc_1',
                          kernel_initializer=Orthogonal())
Exemplo n.º 2
0
def conv(scope,
         n_filter,
         kernal_size,
         strides,
         padding='same',
         gain=1.0,
         act='relu'):
    """
    convolution layer with orthogonal initializer

    params:
    scope: name scope
    n_filter: number of filters
    kernak_size: size of conv kernel
    strides: strides
    padding: padding method
    gain: scale factor of orthogonal initialzer

    Why orthogonal initialization used:
    Eigenvalues of an orthogonal matrix has absolute value 1, 
    which means, at least at early stage of training, 
    it could avoid gradient exploding/vanishing problem.
                                         
    Reference:
    https://smerity.com/articles/2016/orthogonal_init.html
    https://hjweide.github.io/orthogonal-initialization-in-convolutional-layers
    """
    with tf.name_scope(scope):
        layer = Conv2D(n_filter,
                       kernal_size,
                       strides=strides,
                       padding=padding,
                       kernel_initializer=Orthogonal(gain=gain),
                       activation=act)
    return layer
Exemplo n.º 3
0
 def __init__(self,
              units,
              activation=None,
              use_bias=True,
              kernel_initializer=Orthogonal(gain=1.0, seed=None),
              bias_initializer="zeros",
              kernel_regularizer=None,
              bias_regularizer=None,
              activity_regularizer=None,
              kernel_constraint=None,
              bias_constraint=None,
              k_coef_lip=1.0,
              **kwargs):
     super().__init__(units=units,
                      activation=activation,
                      use_bias=use_bias,
                      kernel_initializer=kernel_initializer,
                      bias_initializer=bias_initializer,
                      kernel_regularizer=kernel_regularizer,
                      bias_regularizer=bias_regularizer,
                      activity_regularizer=activity_regularizer,
                      kernel_constraint=kernel_constraint,
                      bias_constraint=bias_constraint,
                      **kwargs)
     self.set_klip_factor(k_coef_lip)
     self.axis = 0
     self._kwargs = kwargs
Exemplo n.º 4
0
 def __init__(self, structure=[16, 32, 32], **conv_kwarg):
     super(Impala_CNN, self).__init__()
     self.conv_block_1 = self.conv_block(structure[0], **conv_kwarg)
     self.conv_block_2 = self.conv_block(structure[1], **conv_kwarg)
     self.conv_block_3 = self.conv_block(structure[2], **conv_kwarg)
     self.flatten_1 = Flatten()
     self.relu_1 = ReLU()
     self.dense_1 = Dense(256,
                          activation='relu',
                          kernel_initializer=Orthogonal())
Exemplo n.º 5
0
    def __init__(
            self,
            niter_spectral=3,
            base_initializer=Orthogonal(gain=1., seed=None),
    ) -> None:
        """
        Initialize a kernel to be 1-lipschitz using spectral normalization (iterative
        power method).

        Args:
            niter_spectral: number of iteration to do with the iterative power method
            base_initializer: method used to generate weights before applying iterative
                power method
        """
        self.niter_spectral = niter_spectral
        self.base_initializer = initializers.get(base_initializer)
        super(SpectralInitializer, self).__init__()
Exemplo n.º 6
0
 def create_lstm_layer_1(self):
     ker_in = glorot_uniform(seed=self.seed)
     rec_in = Orthogonal(seed=self.seed)
     bioutp = Bidirectional(LSTM(self.hidden_dim,
                                 input_shape=(
                                     self.max_sequence_length,
                                     self.embedding_dim,
                                 ),
                                 kernel_regularizer=None,
                                 recurrent_regularizer=None,
                                 bias_regularizer=None,
                                 activity_regularizer=None,
                                 recurrent_dropout=self.recdrop_val,
                                 dropout=self.inpdrop_val,
                                 kernel_initializer=ker_in,
                                 recurrent_initializer=rec_in,
                                 return_sequences=True),
                            merge_mode=None)
     return bioutp
Exemplo n.º 7
0
    def init_FFN(self):
        with tf.variable_scope(self.name):
            self.hidden_layers = []
            for i, Dh in enumerate(self.Dhs):
                self.hidden_layers.append(
                    Dense(Dh,
                          activation="relu",
                          kernel_initializer="he_normal",
                          name="hidden_{}".format(i)))

            mu_initializer = Orthogonal(
                0.01) if self.initialize_around_zero else "he_normal"
            self.mu_layer = Dense(self.Dout,
                                  activation="linear",
                                  kernel_initializer=mu_initializer,
                                  name="mu_layer")

            if self.use_residual:
                self.batch_norm_layer = BatchNormalization()
Exemplo n.º 8
0
 def _build_tf(self, features):
     from tensorflow.keras.layers import Conv2D as _Conv2D
     if self.kernel_initializer is None:
         from tensorflow.keras.initializers import Orthogonal
         self.kernel_initializer = Orthogonal()
     if self.padding is None:
         self.padding = "same"
     self.conv = _Conv2D(filters=self.filters,
                         kernel_size=self.kernel_size,
                         strides=self.stride,
                         dilation_rate=self.dilation_rate,
                         padding=self.padding,
                         activation=None,
                         kernel_initializer=self.kernel_initializer)
     self.conv.build(features.shape)
     from babilim.core.tensor_tf import Tensor as _Tensor
     self.weight = _Tensor(data=None,
                           trainable=True,
                           native=self.conv.kernel)
     self.bias = _Tensor(data=None, trainable=True, native=self.conv.bias)
Exemplo n.º 9
0
 def lstm_layer(self) -> Layer:
     if self.pooling:
         ret_seq = True
     else:
         ret_seq = False
     ker_in = glorot_uniform(seed=self.seed)
     rec_in = Orthogonal(seed=self.seed)
     if self.recurrent == "bilstm" or self.recurrent is None:
         out = Bidirectional(LSTM(self.hidden_dim,
                                  input_shape=(self.max_sequence_length, self.embedding_dim,),
                                  kernel_initializer=ker_in,
                                  recurrent_initializer=rec_in,
                                  return_sequences=ret_seq), merge_mode='concat')
     elif self.recurrent == "lstm":
         out = LSTM(self.hidden_dim,
                    input_shape=(self.max_sequence_length, self.embedding_dim,),
                    kernel_initializer=ker_in,
                    recurrent_initializer=rec_in,
                    return_sequences=ret_seq)
     return out
Exemplo n.º 10
0
 def create_lstm_layer_2(self):
     ker_in = glorot_uniform(seed=self.seed)
     rec_in = Orthogonal(seed=self.seed)
     bioutp = Bidirectional(LSTM(self.aggregation_dim,
                                 input_shape=(
                                     self.max_sequence_length,
                                     8 * self.perspective_num,
                                 ),
                                 kernel_regularizer=None,
                                 recurrent_regularizer=None,
                                 bias_regularizer=None,
                                 activity_regularizer=None,
                                 recurrent_dropout=self.recdrop_val,
                                 dropout=self.inpdrop_val,
                                 kernel_initializer=ker_in,
                                 recurrent_initializer=rec_in,
                                 return_sequences=False),
                            merge_mode='concat',
                            name="sentence_embedding")
     return bioutp
Exemplo n.º 11
0
    def __init__(
            self,
            niter_spectral=3,
            niter_bjorck=15,
            base_initializer=Orthogonal(gain=1., seed=None),
    ) -> None:
        """
        Initialize a kernel to be 1-lipschitz almost everywhere using bjorck
        normalization.

        Args:
            niter_spectral: number of iteration to do with the iterative power method
            niter_bjorck: number of iteration to do with the bjorck algorithm
            base_initializer: method used to generate weights before applying the
                orthonormalization
        """
        self.niter_spectral = niter_spectral
        self.niter_bjorck = niter_bjorck
        self.base_initializer = initializers.get(base_initializer)
        super(BjorckInitializer, self).__init__()
Exemplo n.º 12
0
def getResnetModel(d):
    n = d.num_blocks
    sf = d.start_filter
    dataset = d.dataset
    activation = d.act
    advanced_act = d.aact
    drop_prob = d.dropout
    inputShape = (
        32, 32, 3
    )  #(3, 32, 32) if K.image_dim_ordering() == "th" else (32, 32, 3)
    channelAxis = -1  #1 if K.image_data_format() == 'channels_first' else -1
    filsize = (3, 3)
    convArgs = {
        "padding": "same",
        "use_bias": False,
        "kernel_regularizer": l2(0.0001),
    }
    bnArgs = {"axis": channelAxis, "momentum": 0.9, "epsilon": 1e-04}

    if d.model == "real":
        sf *= 2
        convArgs.update({"kernel_initializer": Orthogonal(float(np.sqrt(2)))})
    elif d.model == "complex":
        convArgs.update({
            "spectral_parametrization": d.spectral_param,
            "kernel_initializer": d.comp_init
        })

    #
    # Input Layer
    #

    I = tf.keras.Input(shape=inputShape)

    #
    # Stage 1
    #

    O = learnConcatRealImagBlock(I, (1, 1), (3, 3), 0, '0', convArgs, bnArgs,
                                 d)

    O = Concatenate(channelAxis)([I, O])
    if d.model == "real":
        O = Conv2D(sf, filsize, name='conv1', **convArgs)(O)
        O = BatchNormalization(name="bn_conv1_2a", **bnArgs)(O)
    else:
        O = ComplexConv2D(sf, filsize, name='conv1', **convArgs)(O)
        O = ComplexBN(name="bn_conv1_2a", **bnArgs)(O)
    O = Spline()(O)  #Activation(activation)(O)

    #
    # Stage 2
    #

    for i in range(n):
        O = getResidualBlock(O, filsize, [sf, sf], 2, str(i), 'regular',
                             convArgs, bnArgs, d)
        if i == n // 2 and d.spectral_pool_scheme == "stagemiddle":
            O = applySpectralPooling(O, d)

    #
    # Stage 3
    #
    O = getResidualBlock(O, filsize, [sf, sf], 3, '0', 'projection', convArgs,
                         bnArgs, d)
    if d.spectral_pool_scheme == "nodownsample":
        O = applySpectralPooling(O, d)

    for i in range(n - 1):
        O = getResidualBlock(O, filsize, [sf * 2, sf * 2], 3, str(i + 1),
                             'regular', convArgs, bnArgs, d)
        if i == n // 2 and d.spectral_pool_scheme == "stagemiddle":
            O = applySpectralPooling(O, d)

    #
    # Stage 4
    #

    O = getResidualBlock(O, filsize, [sf * 2, sf * 2], 4, '0', 'projection',
                         convArgs, bnArgs, d)
    if d.spectral_pool_scheme == "nodownsample":
        O = applySpectralPooling(O, d)

    for i in range(n - 1):
        O = getResidualBlock(O, filsize, [sf * 4, sf * 4], 4, str(i + 1),
                             'regular', convArgs, bnArgs, d)
        if i == n // 2 and d.spectral_pool_scheme == "stagemiddle":
            O = applySpectralPooling(O, d)

    #
    # Pooling
    #

    if d.spectral_pool_scheme == "nodownsample":
        O = applySpectralPooling(O, d)
        O = AveragePooling2D(pool_size=(32, 32))(O)
    else:
        O = AveragePooling2D(pool_size=(8, 8))(O)

    #
    # Flatten
    #

    O = Flatten()(O)

    #
    # Dense
    #

    if dataset == 'cifar10':
        O = Dense(10, activation='softmax', kernel_regularizer=l2(0.0001))(O)
    elif dataset == 'cifar100':
        O = Dense(100, activation='softmax', kernel_regularizer=l2(0.0001))(O)
    elif dataset == 'svhn':
        O = Dense(10, activation='softmax', kernel_regularizer=l2(0.0001))(O)
    else:
        raise ValueError("Unknown dataset " + d.dataset)

    # Return the model
    return Model(I, O)
    def createModel(self):
        self.model_instance += 1
        clear_session()
        if self.checkErrors():
            return

        features, label = self.getDataset()
        X_train, y_train = self.createLag(features, label)
        X_train = X_train[:, self.lags]

        learning_rate = float(self.hyperparameters["Learning_Rate"].get())
        if self.hyperparameters["Optimizer"] != "Adam":
            momentum = float(self.hyperparameters["Momentum"].get())
        optimizers = {
                "Adam": Adam(learning_rate=learning_rate),
                "SGD": SGD(learning_rate=learning_rate, momentum=momentum),
                "RMSprop": RMSprop(learning_rate=learning_rate, momentum=momentum)
                }

        shape = (X_train.shape[1], X_train.shape[2])
        model_choice = self.model_var.get()

        if not self.do_optimization:
            model = Sequential()
            model.add(Input(shape=shape))
            
            if model_choice == 0:
                model.add(Flatten())

            layers = self.no_optimization_choice_var.get()
            for i in range(layers):
                neuron_number = self.neuron_numbers_var[i].get()
                activation_function = self.activation_var[i].get()
                if model_choice == 0:
                    model.add(Dense(neuron_number, activation=activation_function, kernel_initializer=GlorotUniform(seed=0)))
                    model.add(Dropout(0.2))
                
                elif model_choice == 1:
                    model.add(Conv1D(filters=neuron_number, kernel_size=2, activation=activation_function, kernel_initializer=GlorotUniform(seed=0)))
                    model.add(MaxPooling1D(pool_size=2))
                
                elif model_choice == 2:
                    if i == layers-1:
                        model.add(LSTM(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
                    else:
                        model.add(LSTM(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))

                elif model_choice == 3:
                    if i == layers-1:
                        model.add(Bidirectional(LSTM(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))))
                        model.add(Dropout(0.2))
                    else:
                        model.add(Bidirectional(LSTM(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))))
                        model.add(Dropout(0.2))

                elif model_choice == 4:
                    if i == layers-1:
                        model.add(SimpleRNN(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
                    else:
                        model.add(SimpleRNN(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
                
                elif model_choice == 5:
                    if i == layers-1:
                        model.add(GRU(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
                    else:
                        model.add(GRU(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
            
            if model_choice == 1:
                model.add(Flatten())
                model.add(Dense(32, kernel_initializer=GlorotUniform(seed=0)))

            model.add(Dense(1, activation=self.output_activation.get(), kernel_initializer=GlorotUniform(seed=0)))
            model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get())
            
            history = model.fit(X_train, y_train, epochs=self.hyperparameters["Epoch"].get(), batch_size=self.hyperparameters["Batch_Size"].get(), verbose=1, shuffle=False)
            loss = history.history["loss"][-1]
            self.train_loss.set(loss)

        model.summary()
        self.model = model
Exemplo n.º 14
0
def get_unidirectional_LSTM_model(input_shape,
                                  lr=0.005,
                                  dropout_rate=0.2,
                                  layer_sizes=[20, 10, 10, 10],
                                  stddev=0.001,
                                  seed=0):
    '''
    Parameters
    ----------
    input_shape: length 2 tuple
        input dimensionality (int or None, 1)
    lr: float
        Adam optimizer learning rate
    dropout_rate: float in <0, 1>
        Dropout layer dropout fraction during training
    layer_sizes: list of int
        size of layers [Cov1D, Conv1D, LSTM, LSTM]
    stddev: float
        standard deviation of Gaussian noise layer
    seed: int
        random seed+i for layers where i is index of layers that can be seeded
    Returns
    -------
    tf.keras Model instance
    '''
    keras.backend.clear_session()

    # input layer
    inputs = keras.layers.Input(shape=input_shape)

    # Gaussian noise layer
    x = keras.layers.GaussianNoise(stddev)(inputs)

    # conv layer 1
    x = keras.layers.Conv1D(
        layer_sizes[0],
        kernel_size=11,
        strides=1,
        kernel_initializer=GlorotUniform(seed=seed),
        padding='same',
        use_bias=False,
    )(x)
    x = keras.layers.Dropout(dropout_rate, seed=seed + 1)(x)

    # conv layer 2
    x = keras.layers.Conv1D(
        layer_sizes[1],
        kernel_size=11,
        strides=1,
        kernel_initializer=GlorotUniform(seed=seed + 2),
        padding='same',
        use_bias=True,
    )(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation('relu')(x)
    x = keras.layers.Dropout(dropout_rate, seed=seed + 3)(x)

    # LSTM layer 1
    x = keras.layers.LSTM(
        layer_sizes[2],
        return_sequences=True,
        kernel_initializer=GlorotUniform(seed=seed + 4),
        recurrent_initializer=Orthogonal(seed=seed + 5),
    )(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(dropout_rate, seed=seed + 6)(x)

    # LSTM layer 2
    x = keras.layers.LSTM(
        layer_sizes[3],
        return_sequences=True,
        kernel_initializer=GlorotUniform(seed=seed + 7),
        recurrent_initializer=Orthogonal(seed=seed + 8),
    )(x)
    x = keras.layers.Dropout(dropout_rate, seed=seed + 9)(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Dropout(dropout_rate, seed=seed + 10)(x)

    # dense output layer
    predictions = keras.layers.TimeDistributed(
        keras.layers.Dense(1,
                           activation='sigmoid',
                           kernel_initializer=GlorotUniform(seed=seed +
                                                            11)))(x)

    # Define model
    model = keras.models.Model(inputs=inputs,
                               outputs=predictions,
                               name='RippleNet')

    opt = keras.optimizers.Adam(lr=lr)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['mse'])

    return model
Exemplo n.º 15
0
    def load_weights(self, weights_as_bytes):
        """ Load weights of neural model from the binary data.

        :param weights_as_bytes: 2-element tuple of binary data (`bytes` or `byterray` objects) containing weights of
        neural encoder and neural decoder respectively.
        """
        if not isinstance(weights_as_bytes, tuple):
            raise ValueError(f'`weights_as_bytes` must be a 2-element tuple, not `{type(weights_as_bytes)}`!')
        if len(weights_as_bytes) != 2:
            raise ValueError(f'`weights_as_bytes` must be a 2-element tuple, but it is a {len(weights_as_bytes)}-element tuple!')
        if (not isinstance(weights_as_bytes[0], bytearray)) and (not isinstance(weights_as_bytes[0], bytes)):
            raise ValueError(f'First element of `weights_as_bytes` must be an array of bytes, not `{type(weights_as_bytes[0])}`!')
        if (not isinstance(weights_as_bytes[1], bytearray)) and (not isinstance(weights_as_bytes[1], bytes)):
            raise ValueError(f'Second element of `weights_as_bytes` must be an array of bytes, not `{type(weights_as_bytes[1])}`!')
        tmp_weights_name = self.get_temp_name()
        try:
            K.clear_session()
            encoder_inputs = Input(shape=(None, len(self.input_token_index_)),
                                   name='EncoderInputs')
            encoder_mask = Masking(name='EncoderMask', mask_value=0.0)(encoder_inputs)
            encoder = LSTM(
                self.latent_dim,
                return_sequences=False, return_state=True,
                kernel_initializer=GlorotUniform(seed=self.generate_random_seed()),
                recurrent_initializer=Orthogonal(seed=self.generate_random_seed()),
                name='EncoderLSTM'
            )
            encoder_outputs, state_h, state_c = encoder(encoder_mask)
            encoder_states = [state_h, state_c]
            decoder_inputs = Input(shape=(None, len(self.target_token_index_)),
                                   name='DecoderInputs')
            decoder_mask = Masking(name='DecoderMask', mask_value=0.0)(decoder_inputs)
            decoder_lstm = LSTM(
                self.latent_dim, return_sequences=True, return_state=True,
                kernel_initializer=GlorotUniform(seed=self.generate_random_seed()),
                recurrent_initializer=Orthogonal(seed=self.generate_random_seed()),
                name='DecoderLSTM'
            )
            decoder_outputs, _, _ = decoder_lstm(decoder_mask, initial_state=encoder_states)
            decoder_dense = Dense(
                len(self.target_token_index_), activation='softmax',
                kernel_initializer=GlorotUniform(seed=self.generate_random_seed()),
                name='DecoderOutput'
            )
            self.encoder_model_ = Model(encoder_inputs, encoder_states)
            decoder_state_input_h = Input(shape=(self.latent_dim,))
            decoder_state_input_c = Input(shape=(self.latent_dim,))
            decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
            decoder_outputs, state_h, state_c = decoder_lstm(
                decoder_mask, initial_state=decoder_states_inputs)
            decoder_states = [state_h, state_c]
            decoder_outputs = decoder_dense(decoder_outputs)
            self.decoder_model_ = Model(
                [decoder_inputs] + decoder_states_inputs,
                [decoder_outputs] + decoder_states)
            with open(tmp_weights_name, 'wb') as fp:
                fp.write(weights_as_bytes[0])
            self.encoder_model_.load_weights(tmp_weights_name)
            os.remove(tmp_weights_name)
            with open(tmp_weights_name, 'wb') as fp:
                fp.write(weights_as_bytes[1])
            self.decoder_model_.load_weights(tmp_weights_name)
            os.remove(tmp_weights_name)
        finally:
            if os.path.isfile(tmp_weights_name):
                os.remove(tmp_weights_name)
Exemplo n.º 16
0
    def fit(self, X, y, **kwargs):
        """ Fit the seq2seq model to convert sequences one to another.

        Each sequence is unicode text composed from the tokens. Tokens are separated by spaces.

        The Rectified Adam with Lookahead algorithm is used for training. To avoid overfitting,
        you must use an early stopping criterion. This criterion is included automatically
        if evaluation set is defined. You can do this in one of two ways:

        1) set a `validation_split` parameter of this object, and in this case evaluation set will be selected as a
        corresponded part of training set proportionally to the `validation_split` value;

        2) set an `eval_set` argument of this method, and then evaluation set is defined entirely by this argument.

        :param X: input texts for training.
        :param y: target texts for training.
        :param eval_set: optional argument containing input and target texts for evaluation during an early-stopping.

        :return self

        """
        self.check_params(**self.get_params(deep=False))
        self.check_X(X, 'X')
        self.check_X(y, 'y')
        if len(X) != len(y):
            raise ValueError(f'`X` does not correspond to `y`! {len(X)} != {len(y)}.')
        if 'eval_set' in kwargs:
            if (not isinstance(kwargs['eval_set'], tuple)) and (not isinstance(kwargs['eval_set'], list)):
                raise ValueError(f'`eval_set` must be `{type((1, 2))}` or `{type([1, 2])}`, not `{type(kwargs["eval_set"])}`!')
            if len(kwargs['eval_set']) != 2:
                raise ValueError(f'`eval_set` must be a two-element sequence! {len(kwargs["eval_set"])} != 2')
            self.check_X(kwargs['eval_set'][0], 'X_eval_set')
            self.check_X(kwargs['eval_set'][1], 'y_eval_set')
            if len(kwargs['eval_set'][0]) != len(kwargs['eval_set'][1]):
                raise ValueError(f'`X_eval_set` does not correspond to `y_eval_set`! '
                                 f'{len(kwargs["eval_set"][0])} != {len(kwargs["eval_set"][1])}.')
            X_eval_set = kwargs['eval_set'][0]
            y_eval_set = kwargs['eval_set'][1]
        else:
            if self.validation_split is None:
                X_eval_set = None
                y_eval_set = None
            else:
                n_eval_set = int(round(len(X) * self.validation_split))
                if n_eval_set < 1:
                    raise ValueError('`validation_split` is too small! There are no samples for evaluation!')
                if n_eval_set >= len(X):
                    raise ValueError('`validation_split` is too large! There are no samples for training!')
                X_eval_set = X[-n_eval_set:-1]
                y_eval_set = y[-n_eval_set:-1]
                X = X[:-n_eval_set]
                y = y[:-n_eval_set]
        input_characters = set()
        target_characters = set()
        max_encoder_seq_length = 0
        max_decoder_seq_length = 0
        for sample_ind in range(len(X)):
            prep = self.tokenize_text(X[sample_ind], self.lowercase)
            n = len(prep)
            if n == 0:
                raise ValueError(f'Sample {sample_ind} of `X` is wrong! This sample is empty.')
            if n > max_encoder_seq_length:
                max_encoder_seq_length = n
            input_characters |= set(prep)
            prep = self.tokenize_text(y[sample_ind], self.lowercase)
            n = len(prep)
            if n == 0:
                raise ValueError(f'Sample {sample_ind} of `y` is wrong! This sample is empty.')
            if (n + 2) > max_decoder_seq_length:
                max_decoder_seq_length = n + 2
            target_characters |= set(prep)
        if len(input_characters) == 0:
            raise ValueError('`X` is empty!')
        if len(target_characters) == 0:
            raise ValueError('`y` is empty!')
        input_characters_ = set()
        target_characters_ = set()
        if (X_eval_set is not None) and (y_eval_set is not None):
            for sample_ind in range(len(X_eval_set)):
                prep = self.tokenize_text(X_eval_set[sample_ind], self.lowercase)
                n = len(prep)
                if n == 0:
                    raise ValueError(f'Sample {sample_ind} of `X_eval_set` is wrong! This sample is empty.')
                if n > max_encoder_seq_length:
                    max_encoder_seq_length = n
                input_characters_ |= set(prep)
                prep = self.tokenize_text(y_eval_set[sample_ind], self.lowercase)
                n = len(prep)
                if n == 0:
                    raise ValueError(f'Sample {sample_ind} of `y_eval_set` is wrong! This sample is empty.')
                if (n + 2) > max_decoder_seq_length:
                    max_decoder_seq_length = n + 2
                target_characters_ |= set(prep)
            if len(input_characters_) == 0:
                raise ValueError('`X_eval_set` is empty!')
            if len(target_characters_) == 0:
                raise ValueError('`y_eval_set` is empty!')
        input_characters = sorted(list(input_characters | input_characters_))
        target_characters = sorted(list(target_characters | target_characters_ | {'\t', '\n'}))
        if self.verbose:
            print('')
            print(f'Number of samples for training: {len(X)}.')
            if X_eval_set is not None:
                print(f'Number of samples for evaluation and early stopping: {len(X_eval_set)}.')
            print(f'Number of unique input tokens: {len(input_characters)}.')
            print(f'Number of unique output tokens: {len(target_characters)}.')
            print(f'Max sequence length for inputs: {max_encoder_seq_length}.')
            print(f'Max sequence length for outputs: {max_decoder_seq_length}.')
            print('')
        self.input_token_index_ = dict([(char, i) for i, char in enumerate(input_characters)])
        self.target_token_index_ = dict([(char, i) for i, char in enumerate(target_characters)])
        self.max_encoder_seq_length_ = max_encoder_seq_length
        self.max_decoder_seq_length_ = max_decoder_seq_length
        K.clear_session()
        encoder_inputs = Input(shape=(None, len(self.input_token_index_)),
                               name='EncoderInputs')
        encoder_mask = Masking(name='EncoderMask', mask_value=0.0)(encoder_inputs)
        encoder = LSTM(
            self.latent_dim,
            return_sequences=False, return_state=True,
            kernel_initializer=GlorotUniform(seed=self.generate_random_seed()),
            recurrent_initializer=Orthogonal(seed=self.generate_random_seed()),
            name='EncoderLSTM'
        )
        encoder_outputs, state_h, state_c = encoder(encoder_mask)
        encoder_states = [state_h, state_c]
        decoder_inputs = Input(shape=(None, len(self.target_token_index_)),
                               name='DecoderInputs')
        decoder_mask = Masking(name='DecoderMask', mask_value=0.0)(decoder_inputs)
        decoder_lstm = LSTM(
            self.latent_dim,
            return_sequences=True, return_state=True,
            kernel_initializer=GlorotUniform(seed=self.generate_random_seed()),
            recurrent_initializer=Orthogonal(seed=self.generate_random_seed()),
            name='DecoderLSTM'
        )
        decoder_outputs, _, _ = decoder_lstm(decoder_mask, initial_state=encoder_states)
        decoder_dense = Dense(
            len(self.target_token_index_), activation='softmax',
            kernel_initializer=GlorotUniform(seed=self.generate_random_seed()),
            name='DecoderOutput'
        )
        decoder_outputs = decoder_dense(decoder_outputs)
        model = Model([encoder_inputs, decoder_inputs], decoder_outputs,
                      name='Seq2SeqModel')
        radam = RectifiedAdam(learning_rate=self.lr, weight_decay=self.weight_decay)
        optimizer = Lookahead(radam, sync_period=6, slow_step_size=0.5)
        model.compile(optimizer=optimizer, loss='categorical_crossentropy')
        if self.verbose:
            model.summary(positions=[0.23, 0.77, 0.85, 1.0])
            print('')
        training_set_generator = TextPairSequence(
            input_texts=X, target_texts=y,
            batch_size=self.batch_size,
            max_encoder_seq_length=max_encoder_seq_length, max_decoder_seq_length=max_decoder_seq_length,
            input_token_index=self.input_token_index_, target_token_index=self.target_token_index_,
            lowercase=self.lowercase
        )
        if (X_eval_set is not None) and (y_eval_set is not None):
            evaluation_set_generator = TextPairSequence(
                input_texts=X_eval_set, target_texts=y_eval_set,
                batch_size=self.batch_size,
                max_encoder_seq_length=max_encoder_seq_length, max_decoder_seq_length=max_decoder_seq_length,
                input_token_index=self.input_token_index_, target_token_index=self.target_token_index_,
                lowercase=self.lowercase
            )
            callbacks = [
                EarlyStopping(patience=5, verbose=(1 if self.verbose else 0), monitor='val_loss')
            ]
        else:
            evaluation_set_generator = None
            callbacks = []
        tmp_weights_name = self.get_temp_name()
        try:
            callbacks.append(
                ModelCheckpoint(filepath=tmp_weights_name, verbose=(1 if self.verbose else 0), save_best_only=True,
                                save_weights_only=True,
                                monitor='loss' if evaluation_set_generator is None else 'val_loss')
            )
            model.fit_generator(
                generator=training_set_generator,
                epochs=self.epochs, verbose=(1 if self.verbose else 0),
                shuffle=True,
                validation_data=evaluation_set_generator,
                callbacks=callbacks
            )
            if os.path.isfile(tmp_weights_name):
                model.load_weights(tmp_weights_name)
        finally:
            if os.path.isfile(tmp_weights_name):
                os.remove(tmp_weights_name)
        self.encoder_model_ = Model(encoder_inputs, encoder_states)
        decoder_state_input_h = Input(shape=(self.latent_dim,))
        decoder_state_input_c = Input(shape=(self.latent_dim,))
        decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
        decoder_outputs, state_h, state_c = decoder_lstm(
            decoder_mask, initial_state=decoder_states_inputs)
        decoder_states = [state_h, state_c]
        decoder_outputs = decoder_dense(decoder_outputs)
        self.decoder_model_ = Model(
            [decoder_inputs] + decoder_states_inputs,
            [decoder_outputs] + decoder_states)
        self.reverse_target_char_index_ = dict(
            (i, char) for char, i in self.target_token_index_.items())
        return self
Exemplo n.º 17
0
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.initializers import Constant, Orthogonal
from tensorflow.keras.regularizers import l2

orth_weights_initializer = lambda scale: Orthogonal(gain=scale)
const_bias_initializer = lambda value: Constant(value=value)
l2_regularizer = lambda base_val=1.0: l2(l=base_val)

# BN + dropout only for offline algorithms, see Liu et al Feb. 2020


def _mlp_actor_net_orth(hidden_layers=[64, 64]):
    def network_func(input_dim, output_dim):
        state = Input(shape=input_dim)

        x = state
        for i in range(len(hidden_layers)):
            x = Dense(hidden_layers[i],
                      activation='tanh',
                      kernel_initializer=orth_weights_initializer(np.sqrt(2)),
                      bias_initializer=const_bias_initializer(0.0),
                      kernel_regularizer=l2_regularizer())(x)

        action_mu = Dense(output_dim,
                          activation='tanh',
                          kernel_initializer=orth_weights_initializer(0.01),
Exemplo n.º 18
0
    Dense(units=64,
          kernel_initializer=HeUniform(),
          bias_initializer=Ones(),
          activation=relu)
])

model.summary()

model.add(Dense(units=64,
                kernel_initializer=RandomNormal(mean=0., stddev=0.005),
                bias_initializer=Constant(value=0.03),
                activation=elu,
                name='rand_norm_0.0005'))

model.add(Dense(units=8,
                kernel_initializer=Orthogonal(gain=0.9),
                bias_initializer=Constant(value=0.04),
                activation=selu))

model.summary()


def my_custom_initializer(shape, dtype=None):
    return K.random_normal(shape, dtype=dtype)


model.add(Dense(units=64, kernel_initializer=my_custom_initializer, activation=selu))

model.summary()

fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 16))
Exemplo n.º 19
0
 def __init__(self, eps_std=0.05, seed=None):
     self.eps_std = eps_std
     self.seed = seed
     self.orthogonal = Orthogonal()
Exemplo n.º 20
0
    def createModel(self):
        self.model_instance += 1
        clear_session()

        features, label = self.getDataset()
        X_train, y_train = self.createLag(features, label)
        X_train = X_train[:, self.lags]

        learning_rate = float(self.hyperparameters["Learning_Rate"].get())
        momentum = float(self.hyperparameters["Momentum"].get())
        optimizers = {
                "Adam": Adam(learning_rate=learning_rate),
                "SGD": SGD(learning_rate=learning_rate, momentum=momentum),
                "RMSprop": RMSprop(learning_rate=learning_rate, momentum=momentum)
                }

        shape = (X_train.shape[1], X_train.shape[2])
        model_choice = self.model_var.get()

        if not self.do_optimization:
            model = Sequential()
            model.add(Input(shape=shape))
            
            if model_choice == 0:
                model.add(Flatten())

            layers = self.no_optimization_choice_var.get()
            for i in range(layers):
                neuron_number = self.neuron_numbers_var[i].get()
                activation_function = self.activation_var[i].get()
                if model_choice == 0:
                    model.add(Dense(neuron_number, activation=activation_function, kernel_initializer=GlorotUniform(seed=0)))
                
                elif model_choice == 1:
                    model.add(Conv1D(filters=neuron_number, kernel_size=2, activation=activation_function, kernel_initializer=GlorotUniform(seed=0)))
                    model.add(MaxPooling1D(pool_size=2))
                
                elif model_choice == 2:
                    if i == layers-1:
                        model.add(LSTM(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
                    else:
                        model.add(LSTM(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))

                elif model_choice == 3:
                    if i == layers-1:
                        model.add(Bidirectional(LSTM(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))))
                        model.add(Dropout(0.2))
                    else:
                        model.add(Bidirectional(LSTM(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))))
                        model.add(Dropout(0.2))

                elif model_choice == 4:
                    if i == layers-1:
                        model.add(SimpleRNN(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
                    else:
                        model.add(SimpleRNN(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
                
                elif model_choice == 5:
                    if i == layers-1:
                        model.add(GRU(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
                    else:
                        model.add(GRU(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))
                        model.add(Dropout(0.2))
            
            if model_choice == 1:
                model.add(Flatten())
                model.add(Dense(32, kernel_initializer=GlorotUniform(seed=0)))

            model.add(Dense(1, activation=self.output_activation.get(), kernel_initializer=GlorotUniform(seed=0)))
            model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get())
            
            history = model.fit(X_train, y_train, epochs=self.hyperparameters["Epoch"].get(), batch_size=self.hyperparameters["Batch_Size"].get(), verbose=1, shuffle=False)
            loss = history.history["loss"][-1]
            self.train_loss.set(loss)

        elif self.do_optimization:
            layer = self.optimization_choice_var.get()

            if model_choice == 0:
                def build_model(hp):
                    model = Sequential()
                    model.add(Input(shape=shape))
                    model.add(Flatten())
                    for i in range(layer):
                        n_min = self.neuron_min_number_var[i].get()
                        n_max = self.neuron_max_number_var[i].get()
                        step = int((n_max - n_min)/4)
                        model.add(Dense(units=hp.Int('MLP_'+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu'))
                    model.add(Dense(1))
                    model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get())
                    return model
                
                name = str(self.model_instance) + ". MLP"

            elif model_choice == 1:
                def build_model(hp):
                    model = Sequential()
                    model.add(Input(shape=shape))
                    for i in range(layer):
                        n_min = self.neuron_min_number_var[i].get()
                        n_max = self.neuron_max_number_var[i].get()
                        step = int((n_max-n_min)/4)
                        model.add(Conv1D(filters=hp.Int("CNN_"+str(i), min_value=n_min, max_value=n_max, step=step), kernel_size=2, activation="relu", kernel_initializer=GlorotUniform(seed=0)))
                        model.add(MaxPooling1D(pool_size=2))
                    
                    model.add(Flatten())
                    model.add(Dense(32, kernel_initializer=GlorotUniform(seed=0)))
                    model.add(Dense(1, kernel_initializer=GlorotUniform(seed=0)))
                    model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get())
                    return model
                
                name = str(self.model_instance) + ". CNN"

            elif model_choice == 2:
                def build_model(hp):
                    model = Sequential()
                    model.add(Input(shape=shape))
                    for i in range(layer):
                        n_min = self.neuron_min_number_var[i].get()
                        n_max = self.neuron_max_number_var[i].get()
                        step = int((n_max - n_min)/4)
                        model.add(LSTM(units=hp.Int("LSTM_"+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu', return_sequences=True, kernel_initializer=GlorotUniform(seed=0)))
                        if i == layer-1:
                            model.add(LSTM(units=hp.Int("LSTM_"+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu', return_sequences=False, kernel_initializer=GlorotUniform(seed=0)))
                    
                    model.add(Dense(1))
                    model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get())
                    return model
                
                name = str(self.model_instance) + ". LSTM"
            
            elif model_choice == 3:
                def build_model(hp):
                    model = Sequential()
                    model.add(Input(shape=shape))
                    for i in range(layer):
                        n_min = self.neuron_min_number_var[i].get()
                        n_max = self.neuron_max_number_var[i].get()
                        step = int((n_max - n_min)/4)
                        model.add(Bidirectional(LSTM(units=hp.Int("LSTM_"+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu', return_sequences=True, kernel_initializer=GlorotUniform(seed=0))))
                        if i == layer-1:
                            model.add(Bidirectional(LSTM(units=hp.Int("LSTM_"+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu', return_sequences=False, kernel_initializer=GlorotUniform(seed=0))))
                    
                    model.add(Dense(1, kernel_initializer=GlorotUniform(seed=0)))
                    model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get())
                    return model

                name = str(self.model_instance) + ". Bi-LSTM"


            tuner = RandomSearch(build_model, objective='loss', max_trials=25, executions_per_trial=2, directory=self.runtime, project_name=name)
            
            tuner.search(X_train, y_train, epochs=self.hyperparameters["Epoch"].get(), batch_size=self.hyperparameters["Batch_Size"].get())
            hps = tuner.get_best_hyperparameters(num_trials = 1)[0]
            model = tuner.hypermodel.build(hps)
            
            history = model.fit(X_train, y_train, epochs=self.hyperparameters["Epoch"].get(), batch_size=self.hyperparameters["Batch_Size"].get(), verbose=1)
            loss = history.history["loss"][-1]
            self.train_loss.set(loss)
            

            for i in range(layer):
                if model_choice == 0:
                    self.best_model_neurons[i].set(model.get_layer(index=i+1).get_config()["units"])
                elif model_choice == 1:
                    self.best_model_neurons[i].set(model.get_layer(index=(2*i)).get_config()["filters"])
                elif model_choice == 2:
                    self.best_model_neurons[i].set(model.get_layer(index=i).get_config()["units"])
                elif model_choice == 3:
                    self.best_model_neurons[i].set(model.get_layer(index=i).get_config()["layer"]["config"]["units"])
        model.summary()
        self.model = model
Exemplo n.º 21
0
from arena5.algos.hppo.GAE import GAE
import tensorflow as tf
from stable_baselines.common.policies import MlpPolicy, CnnPolicy
from stable_baselines.common import tf_util, zipsame
from stable_baselines.common.distributions import DiagGaussianProbabilityDistribution
from tensorflow.keras.layers import Lambda, Input, LSTM, Dense, Reshape, Flatten, multiply, RepeatVector, Permute
from tensorflow.keras.initializers import Orthogonal
from stable_baselines.common import Dataset

from tensorflow.keras import backend as K
from stable_baselines.common.mpi_adam import MpiAdam
from stable_baselines.common import tf_util, zipsame
from tensorflow.keras.backend import set_session
from stable_baselines import logger

ORTHO_01 = Orthogonal(0.01)

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'


class HPPOPolicy():
    def __init__(self, env, policy_comm, **kwargs):

        # Pull params out of kwargs
        self.params = kwargs['params']

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
        config.log_device_placement = True  # to log device placement (on which device the operation ran)
        sess = tf.Session(config=config)
        set_session(