def __init__(self, **conv_kwarg): super(Basic_CNN, self).__init__() self.conv_1 = conv('conv_1', 32, (8, 8), strides=4, padding='same', gain=np.sqrt(2), act='relu', **conv_kwarg) self.conv_2 = conv('conv_2', 64, (4, 4), strides=2, padding='same', gain=np.sqrt(2), act='relu', **conv_kwarg) self.conv_3 = conv('conv_3', 64, (1, 1), strides=1, padding='same', gain=np.sqrt(2), act='relu', **conv_kwarg) self.flatten_1 = Flatten() self.dense_1 = Dense(256, activation='relu', name='fc_1', kernel_initializer=Orthogonal())
def conv(scope, n_filter, kernal_size, strides, padding='same', gain=1.0, act='relu'): """ convolution layer with orthogonal initializer params: scope: name scope n_filter: number of filters kernak_size: size of conv kernel strides: strides padding: padding method gain: scale factor of orthogonal initialzer Why orthogonal initialization used: Eigenvalues of an orthogonal matrix has absolute value 1, which means, at least at early stage of training, it could avoid gradient exploding/vanishing problem. Reference: https://smerity.com/articles/2016/orthogonal_init.html https://hjweide.github.io/orthogonal-initialization-in-convolutional-layers """ with tf.name_scope(scope): layer = Conv2D(n_filter, kernal_size, strides=strides, padding=padding, kernel_initializer=Orthogonal(gain=gain), activation=act) return layer
def __init__(self, units, activation=None, use_bias=True, kernel_initializer=Orthogonal(gain=1.0, seed=None), bias_initializer="zeros", kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, k_coef_lip=1.0, **kwargs): super().__init__(units=units, activation=activation, use_bias=use_bias, kernel_initializer=kernel_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, kernel_constraint=kernel_constraint, bias_constraint=bias_constraint, **kwargs) self.set_klip_factor(k_coef_lip) self.axis = 0 self._kwargs = kwargs
def __init__(self, structure=[16, 32, 32], **conv_kwarg): super(Impala_CNN, self).__init__() self.conv_block_1 = self.conv_block(structure[0], **conv_kwarg) self.conv_block_2 = self.conv_block(structure[1], **conv_kwarg) self.conv_block_3 = self.conv_block(structure[2], **conv_kwarg) self.flatten_1 = Flatten() self.relu_1 = ReLU() self.dense_1 = Dense(256, activation='relu', kernel_initializer=Orthogonal())
def __init__( self, niter_spectral=3, base_initializer=Orthogonal(gain=1., seed=None), ) -> None: """ Initialize a kernel to be 1-lipschitz using spectral normalization (iterative power method). Args: niter_spectral: number of iteration to do with the iterative power method base_initializer: method used to generate weights before applying iterative power method """ self.niter_spectral = niter_spectral self.base_initializer = initializers.get(base_initializer) super(SpectralInitializer, self).__init__()
def create_lstm_layer_1(self): ker_in = glorot_uniform(seed=self.seed) rec_in = Orthogonal(seed=self.seed) bioutp = Bidirectional(LSTM(self.hidden_dim, input_shape=( self.max_sequence_length, self.embedding_dim, ), kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, recurrent_dropout=self.recdrop_val, dropout=self.inpdrop_val, kernel_initializer=ker_in, recurrent_initializer=rec_in, return_sequences=True), merge_mode=None) return bioutp
def init_FFN(self): with tf.variable_scope(self.name): self.hidden_layers = [] for i, Dh in enumerate(self.Dhs): self.hidden_layers.append( Dense(Dh, activation="relu", kernel_initializer="he_normal", name="hidden_{}".format(i))) mu_initializer = Orthogonal( 0.01) if self.initialize_around_zero else "he_normal" self.mu_layer = Dense(self.Dout, activation="linear", kernel_initializer=mu_initializer, name="mu_layer") if self.use_residual: self.batch_norm_layer = BatchNormalization()
def _build_tf(self, features): from tensorflow.keras.layers import Conv2D as _Conv2D if self.kernel_initializer is None: from tensorflow.keras.initializers import Orthogonal self.kernel_initializer = Orthogonal() if self.padding is None: self.padding = "same" self.conv = _Conv2D(filters=self.filters, kernel_size=self.kernel_size, strides=self.stride, dilation_rate=self.dilation_rate, padding=self.padding, activation=None, kernel_initializer=self.kernel_initializer) self.conv.build(features.shape) from babilim.core.tensor_tf import Tensor as _Tensor self.weight = _Tensor(data=None, trainable=True, native=self.conv.kernel) self.bias = _Tensor(data=None, trainable=True, native=self.conv.bias)
def lstm_layer(self) -> Layer: if self.pooling: ret_seq = True else: ret_seq = False ker_in = glorot_uniform(seed=self.seed) rec_in = Orthogonal(seed=self.seed) if self.recurrent == "bilstm" or self.recurrent is None: out = Bidirectional(LSTM(self.hidden_dim, input_shape=(self.max_sequence_length, self.embedding_dim,), kernel_initializer=ker_in, recurrent_initializer=rec_in, return_sequences=ret_seq), merge_mode='concat') elif self.recurrent == "lstm": out = LSTM(self.hidden_dim, input_shape=(self.max_sequence_length, self.embedding_dim,), kernel_initializer=ker_in, recurrent_initializer=rec_in, return_sequences=ret_seq) return out
def create_lstm_layer_2(self): ker_in = glorot_uniform(seed=self.seed) rec_in = Orthogonal(seed=self.seed) bioutp = Bidirectional(LSTM(self.aggregation_dim, input_shape=( self.max_sequence_length, 8 * self.perspective_num, ), kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, recurrent_dropout=self.recdrop_val, dropout=self.inpdrop_val, kernel_initializer=ker_in, recurrent_initializer=rec_in, return_sequences=False), merge_mode='concat', name="sentence_embedding") return bioutp
def __init__( self, niter_spectral=3, niter_bjorck=15, base_initializer=Orthogonal(gain=1., seed=None), ) -> None: """ Initialize a kernel to be 1-lipschitz almost everywhere using bjorck normalization. Args: niter_spectral: number of iteration to do with the iterative power method niter_bjorck: number of iteration to do with the bjorck algorithm base_initializer: method used to generate weights before applying the orthonormalization """ self.niter_spectral = niter_spectral self.niter_bjorck = niter_bjorck self.base_initializer = initializers.get(base_initializer) super(BjorckInitializer, self).__init__()
def getResnetModel(d): n = d.num_blocks sf = d.start_filter dataset = d.dataset activation = d.act advanced_act = d.aact drop_prob = d.dropout inputShape = ( 32, 32, 3 ) #(3, 32, 32) if K.image_dim_ordering() == "th" else (32, 32, 3) channelAxis = -1 #1 if K.image_data_format() == 'channels_first' else -1 filsize = (3, 3) convArgs = { "padding": "same", "use_bias": False, "kernel_regularizer": l2(0.0001), } bnArgs = {"axis": channelAxis, "momentum": 0.9, "epsilon": 1e-04} if d.model == "real": sf *= 2 convArgs.update({"kernel_initializer": Orthogonal(float(np.sqrt(2)))}) elif d.model == "complex": convArgs.update({ "spectral_parametrization": d.spectral_param, "kernel_initializer": d.comp_init }) # # Input Layer # I = tf.keras.Input(shape=inputShape) # # Stage 1 # O = learnConcatRealImagBlock(I, (1, 1), (3, 3), 0, '0', convArgs, bnArgs, d) O = Concatenate(channelAxis)([I, O]) if d.model == "real": O = Conv2D(sf, filsize, name='conv1', **convArgs)(O) O = BatchNormalization(name="bn_conv1_2a", **bnArgs)(O) else: O = ComplexConv2D(sf, filsize, name='conv1', **convArgs)(O) O = ComplexBN(name="bn_conv1_2a", **bnArgs)(O) O = Spline()(O) #Activation(activation)(O) # # Stage 2 # for i in range(n): O = getResidualBlock(O, filsize, [sf, sf], 2, str(i), 'regular', convArgs, bnArgs, d) if i == n // 2 and d.spectral_pool_scheme == "stagemiddle": O = applySpectralPooling(O, d) # # Stage 3 # O = getResidualBlock(O, filsize, [sf, sf], 3, '0', 'projection', convArgs, bnArgs, d) if d.spectral_pool_scheme == "nodownsample": O = applySpectralPooling(O, d) for i in range(n - 1): O = getResidualBlock(O, filsize, [sf * 2, sf * 2], 3, str(i + 1), 'regular', convArgs, bnArgs, d) if i == n // 2 and d.spectral_pool_scheme == "stagemiddle": O = applySpectralPooling(O, d) # # Stage 4 # O = getResidualBlock(O, filsize, [sf * 2, sf * 2], 4, '0', 'projection', convArgs, bnArgs, d) if d.spectral_pool_scheme == "nodownsample": O = applySpectralPooling(O, d) for i in range(n - 1): O = getResidualBlock(O, filsize, [sf * 4, sf * 4], 4, str(i + 1), 'regular', convArgs, bnArgs, d) if i == n // 2 and d.spectral_pool_scheme == "stagemiddle": O = applySpectralPooling(O, d) # # Pooling # if d.spectral_pool_scheme == "nodownsample": O = applySpectralPooling(O, d) O = AveragePooling2D(pool_size=(32, 32))(O) else: O = AveragePooling2D(pool_size=(8, 8))(O) # # Flatten # O = Flatten()(O) # # Dense # if dataset == 'cifar10': O = Dense(10, activation='softmax', kernel_regularizer=l2(0.0001))(O) elif dataset == 'cifar100': O = Dense(100, activation='softmax', kernel_regularizer=l2(0.0001))(O) elif dataset == 'svhn': O = Dense(10, activation='softmax', kernel_regularizer=l2(0.0001))(O) else: raise ValueError("Unknown dataset " + d.dataset) # Return the model return Model(I, O)
def createModel(self): self.model_instance += 1 clear_session() if self.checkErrors(): return features, label = self.getDataset() X_train, y_train = self.createLag(features, label) X_train = X_train[:, self.lags] learning_rate = float(self.hyperparameters["Learning_Rate"].get()) if self.hyperparameters["Optimizer"] != "Adam": momentum = float(self.hyperparameters["Momentum"].get()) optimizers = { "Adam": Adam(learning_rate=learning_rate), "SGD": SGD(learning_rate=learning_rate, momentum=momentum), "RMSprop": RMSprop(learning_rate=learning_rate, momentum=momentum) } shape = (X_train.shape[1], X_train.shape[2]) model_choice = self.model_var.get() if not self.do_optimization: model = Sequential() model.add(Input(shape=shape)) if model_choice == 0: model.add(Flatten()) layers = self.no_optimization_choice_var.get() for i in range(layers): neuron_number = self.neuron_numbers_var[i].get() activation_function = self.activation_var[i].get() if model_choice == 0: model.add(Dense(neuron_number, activation=activation_function, kernel_initializer=GlorotUniform(seed=0))) model.add(Dropout(0.2)) elif model_choice == 1: model.add(Conv1D(filters=neuron_number, kernel_size=2, activation=activation_function, kernel_initializer=GlorotUniform(seed=0))) model.add(MaxPooling1D(pool_size=2)) elif model_choice == 2: if i == layers-1: model.add(LSTM(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) else: model.add(LSTM(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) elif model_choice == 3: if i == layers-1: model.add(Bidirectional(LSTM(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))) model.add(Dropout(0.2)) else: model.add(Bidirectional(LSTM(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))) model.add(Dropout(0.2)) elif model_choice == 4: if i == layers-1: model.add(SimpleRNN(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) else: model.add(SimpleRNN(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) elif model_choice == 5: if i == layers-1: model.add(GRU(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) else: model.add(GRU(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) if model_choice == 1: model.add(Flatten()) model.add(Dense(32, kernel_initializer=GlorotUniform(seed=0))) model.add(Dense(1, activation=self.output_activation.get(), kernel_initializer=GlorotUniform(seed=0))) model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get()) history = model.fit(X_train, y_train, epochs=self.hyperparameters["Epoch"].get(), batch_size=self.hyperparameters["Batch_Size"].get(), verbose=1, shuffle=False) loss = history.history["loss"][-1] self.train_loss.set(loss) model.summary() self.model = model
def get_unidirectional_LSTM_model(input_shape, lr=0.005, dropout_rate=0.2, layer_sizes=[20, 10, 10, 10], stddev=0.001, seed=0): ''' Parameters ---------- input_shape: length 2 tuple input dimensionality (int or None, 1) lr: float Adam optimizer learning rate dropout_rate: float in <0, 1> Dropout layer dropout fraction during training layer_sizes: list of int size of layers [Cov1D, Conv1D, LSTM, LSTM] stddev: float standard deviation of Gaussian noise layer seed: int random seed+i for layers where i is index of layers that can be seeded Returns ------- tf.keras Model instance ''' keras.backend.clear_session() # input layer inputs = keras.layers.Input(shape=input_shape) # Gaussian noise layer x = keras.layers.GaussianNoise(stddev)(inputs) # conv layer 1 x = keras.layers.Conv1D( layer_sizes[0], kernel_size=11, strides=1, kernel_initializer=GlorotUniform(seed=seed), padding='same', use_bias=False, )(x) x = keras.layers.Dropout(dropout_rate, seed=seed + 1)(x) # conv layer 2 x = keras.layers.Conv1D( layer_sizes[1], kernel_size=11, strides=1, kernel_initializer=GlorotUniform(seed=seed + 2), padding='same', use_bias=True, )(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.Activation('relu')(x) x = keras.layers.Dropout(dropout_rate, seed=seed + 3)(x) # LSTM layer 1 x = keras.layers.LSTM( layer_sizes[2], return_sequences=True, kernel_initializer=GlorotUniform(seed=seed + 4), recurrent_initializer=Orthogonal(seed=seed + 5), )(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.Dropout(dropout_rate, seed=seed + 6)(x) # LSTM layer 2 x = keras.layers.LSTM( layer_sizes[3], return_sequences=True, kernel_initializer=GlorotUniform(seed=seed + 7), recurrent_initializer=Orthogonal(seed=seed + 8), )(x) x = keras.layers.Dropout(dropout_rate, seed=seed + 9)(x) x = keras.layers.BatchNormalization()(x) x = keras.layers.Dropout(dropout_rate, seed=seed + 10)(x) # dense output layer predictions = keras.layers.TimeDistributed( keras.layers.Dense(1, activation='sigmoid', kernel_initializer=GlorotUniform(seed=seed + 11)))(x) # Define model model = keras.models.Model(inputs=inputs, outputs=predictions, name='RippleNet') opt = keras.optimizers.Adam(lr=lr) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['mse']) return model
def load_weights(self, weights_as_bytes): """ Load weights of neural model from the binary data. :param weights_as_bytes: 2-element tuple of binary data (`bytes` or `byterray` objects) containing weights of neural encoder and neural decoder respectively. """ if not isinstance(weights_as_bytes, tuple): raise ValueError(f'`weights_as_bytes` must be a 2-element tuple, not `{type(weights_as_bytes)}`!') if len(weights_as_bytes) != 2: raise ValueError(f'`weights_as_bytes` must be a 2-element tuple, but it is a {len(weights_as_bytes)}-element tuple!') if (not isinstance(weights_as_bytes[0], bytearray)) and (not isinstance(weights_as_bytes[0], bytes)): raise ValueError(f'First element of `weights_as_bytes` must be an array of bytes, not `{type(weights_as_bytes[0])}`!') if (not isinstance(weights_as_bytes[1], bytearray)) and (not isinstance(weights_as_bytes[1], bytes)): raise ValueError(f'Second element of `weights_as_bytes` must be an array of bytes, not `{type(weights_as_bytes[1])}`!') tmp_weights_name = self.get_temp_name() try: K.clear_session() encoder_inputs = Input(shape=(None, len(self.input_token_index_)), name='EncoderInputs') encoder_mask = Masking(name='EncoderMask', mask_value=0.0)(encoder_inputs) encoder = LSTM( self.latent_dim, return_sequences=False, return_state=True, kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), recurrent_initializer=Orthogonal(seed=self.generate_random_seed()), name='EncoderLSTM' ) encoder_outputs, state_h, state_c = encoder(encoder_mask) encoder_states = [state_h, state_c] decoder_inputs = Input(shape=(None, len(self.target_token_index_)), name='DecoderInputs') decoder_mask = Masking(name='DecoderMask', mask_value=0.0)(decoder_inputs) decoder_lstm = LSTM( self.latent_dim, return_sequences=True, return_state=True, kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), recurrent_initializer=Orthogonal(seed=self.generate_random_seed()), name='DecoderLSTM' ) decoder_outputs, _, _ = decoder_lstm(decoder_mask, initial_state=encoder_states) decoder_dense = Dense( len(self.target_token_index_), activation='softmax', kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), name='DecoderOutput' ) self.encoder_model_ = Model(encoder_inputs, encoder_states) decoder_state_input_h = Input(shape=(self.latent_dim,)) decoder_state_input_c = Input(shape=(self.latent_dim,)) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = decoder_lstm( decoder_mask, initial_state=decoder_states_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) self.decoder_model_ = Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) with open(tmp_weights_name, 'wb') as fp: fp.write(weights_as_bytes[0]) self.encoder_model_.load_weights(tmp_weights_name) os.remove(tmp_weights_name) with open(tmp_weights_name, 'wb') as fp: fp.write(weights_as_bytes[1]) self.decoder_model_.load_weights(tmp_weights_name) os.remove(tmp_weights_name) finally: if os.path.isfile(tmp_weights_name): os.remove(tmp_weights_name)
def fit(self, X, y, **kwargs): """ Fit the seq2seq model to convert sequences one to another. Each sequence is unicode text composed from the tokens. Tokens are separated by spaces. The Rectified Adam with Lookahead algorithm is used for training. To avoid overfitting, you must use an early stopping criterion. This criterion is included automatically if evaluation set is defined. You can do this in one of two ways: 1) set a `validation_split` parameter of this object, and in this case evaluation set will be selected as a corresponded part of training set proportionally to the `validation_split` value; 2) set an `eval_set` argument of this method, and then evaluation set is defined entirely by this argument. :param X: input texts for training. :param y: target texts for training. :param eval_set: optional argument containing input and target texts for evaluation during an early-stopping. :return self """ self.check_params(**self.get_params(deep=False)) self.check_X(X, 'X') self.check_X(y, 'y') if len(X) != len(y): raise ValueError(f'`X` does not correspond to `y`! {len(X)} != {len(y)}.') if 'eval_set' in kwargs: if (not isinstance(kwargs['eval_set'], tuple)) and (not isinstance(kwargs['eval_set'], list)): raise ValueError(f'`eval_set` must be `{type((1, 2))}` or `{type([1, 2])}`, not `{type(kwargs["eval_set"])}`!') if len(kwargs['eval_set']) != 2: raise ValueError(f'`eval_set` must be a two-element sequence! {len(kwargs["eval_set"])} != 2') self.check_X(kwargs['eval_set'][0], 'X_eval_set') self.check_X(kwargs['eval_set'][1], 'y_eval_set') if len(kwargs['eval_set'][0]) != len(kwargs['eval_set'][1]): raise ValueError(f'`X_eval_set` does not correspond to `y_eval_set`! ' f'{len(kwargs["eval_set"][0])} != {len(kwargs["eval_set"][1])}.') X_eval_set = kwargs['eval_set'][0] y_eval_set = kwargs['eval_set'][1] else: if self.validation_split is None: X_eval_set = None y_eval_set = None else: n_eval_set = int(round(len(X) * self.validation_split)) if n_eval_set < 1: raise ValueError('`validation_split` is too small! There are no samples for evaluation!') if n_eval_set >= len(X): raise ValueError('`validation_split` is too large! There are no samples for training!') X_eval_set = X[-n_eval_set:-1] y_eval_set = y[-n_eval_set:-1] X = X[:-n_eval_set] y = y[:-n_eval_set] input_characters = set() target_characters = set() max_encoder_seq_length = 0 max_decoder_seq_length = 0 for sample_ind in range(len(X)): prep = self.tokenize_text(X[sample_ind], self.lowercase) n = len(prep) if n == 0: raise ValueError(f'Sample {sample_ind} of `X` is wrong! This sample is empty.') if n > max_encoder_seq_length: max_encoder_seq_length = n input_characters |= set(prep) prep = self.tokenize_text(y[sample_ind], self.lowercase) n = len(prep) if n == 0: raise ValueError(f'Sample {sample_ind} of `y` is wrong! This sample is empty.') if (n + 2) > max_decoder_seq_length: max_decoder_seq_length = n + 2 target_characters |= set(prep) if len(input_characters) == 0: raise ValueError('`X` is empty!') if len(target_characters) == 0: raise ValueError('`y` is empty!') input_characters_ = set() target_characters_ = set() if (X_eval_set is not None) and (y_eval_set is not None): for sample_ind in range(len(X_eval_set)): prep = self.tokenize_text(X_eval_set[sample_ind], self.lowercase) n = len(prep) if n == 0: raise ValueError(f'Sample {sample_ind} of `X_eval_set` is wrong! This sample is empty.') if n > max_encoder_seq_length: max_encoder_seq_length = n input_characters_ |= set(prep) prep = self.tokenize_text(y_eval_set[sample_ind], self.lowercase) n = len(prep) if n == 0: raise ValueError(f'Sample {sample_ind} of `y_eval_set` is wrong! This sample is empty.') if (n + 2) > max_decoder_seq_length: max_decoder_seq_length = n + 2 target_characters_ |= set(prep) if len(input_characters_) == 0: raise ValueError('`X_eval_set` is empty!') if len(target_characters_) == 0: raise ValueError('`y_eval_set` is empty!') input_characters = sorted(list(input_characters | input_characters_)) target_characters = sorted(list(target_characters | target_characters_ | {'\t', '\n'})) if self.verbose: print('') print(f'Number of samples for training: {len(X)}.') if X_eval_set is not None: print(f'Number of samples for evaluation and early stopping: {len(X_eval_set)}.') print(f'Number of unique input tokens: {len(input_characters)}.') print(f'Number of unique output tokens: {len(target_characters)}.') print(f'Max sequence length for inputs: {max_encoder_seq_length}.') print(f'Max sequence length for outputs: {max_decoder_seq_length}.') print('') self.input_token_index_ = dict([(char, i) for i, char in enumerate(input_characters)]) self.target_token_index_ = dict([(char, i) for i, char in enumerate(target_characters)]) self.max_encoder_seq_length_ = max_encoder_seq_length self.max_decoder_seq_length_ = max_decoder_seq_length K.clear_session() encoder_inputs = Input(shape=(None, len(self.input_token_index_)), name='EncoderInputs') encoder_mask = Masking(name='EncoderMask', mask_value=0.0)(encoder_inputs) encoder = LSTM( self.latent_dim, return_sequences=False, return_state=True, kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), recurrent_initializer=Orthogonal(seed=self.generate_random_seed()), name='EncoderLSTM' ) encoder_outputs, state_h, state_c = encoder(encoder_mask) encoder_states = [state_h, state_c] decoder_inputs = Input(shape=(None, len(self.target_token_index_)), name='DecoderInputs') decoder_mask = Masking(name='DecoderMask', mask_value=0.0)(decoder_inputs) decoder_lstm = LSTM( self.latent_dim, return_sequences=True, return_state=True, kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), recurrent_initializer=Orthogonal(seed=self.generate_random_seed()), name='DecoderLSTM' ) decoder_outputs, _, _ = decoder_lstm(decoder_mask, initial_state=encoder_states) decoder_dense = Dense( len(self.target_token_index_), activation='softmax', kernel_initializer=GlorotUniform(seed=self.generate_random_seed()), name='DecoderOutput' ) decoder_outputs = decoder_dense(decoder_outputs) model = Model([encoder_inputs, decoder_inputs], decoder_outputs, name='Seq2SeqModel') radam = RectifiedAdam(learning_rate=self.lr, weight_decay=self.weight_decay) optimizer = Lookahead(radam, sync_period=6, slow_step_size=0.5) model.compile(optimizer=optimizer, loss='categorical_crossentropy') if self.verbose: model.summary(positions=[0.23, 0.77, 0.85, 1.0]) print('') training_set_generator = TextPairSequence( input_texts=X, target_texts=y, batch_size=self.batch_size, max_encoder_seq_length=max_encoder_seq_length, max_decoder_seq_length=max_decoder_seq_length, input_token_index=self.input_token_index_, target_token_index=self.target_token_index_, lowercase=self.lowercase ) if (X_eval_set is not None) and (y_eval_set is not None): evaluation_set_generator = TextPairSequence( input_texts=X_eval_set, target_texts=y_eval_set, batch_size=self.batch_size, max_encoder_seq_length=max_encoder_seq_length, max_decoder_seq_length=max_decoder_seq_length, input_token_index=self.input_token_index_, target_token_index=self.target_token_index_, lowercase=self.lowercase ) callbacks = [ EarlyStopping(patience=5, verbose=(1 if self.verbose else 0), monitor='val_loss') ] else: evaluation_set_generator = None callbacks = [] tmp_weights_name = self.get_temp_name() try: callbacks.append( ModelCheckpoint(filepath=tmp_weights_name, verbose=(1 if self.verbose else 0), save_best_only=True, save_weights_only=True, monitor='loss' if evaluation_set_generator is None else 'val_loss') ) model.fit_generator( generator=training_set_generator, epochs=self.epochs, verbose=(1 if self.verbose else 0), shuffle=True, validation_data=evaluation_set_generator, callbacks=callbacks ) if os.path.isfile(tmp_weights_name): model.load_weights(tmp_weights_name) finally: if os.path.isfile(tmp_weights_name): os.remove(tmp_weights_name) self.encoder_model_ = Model(encoder_inputs, encoder_states) decoder_state_input_h = Input(shape=(self.latent_dim,)) decoder_state_input_c = Input(shape=(self.latent_dim,)) decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] decoder_outputs, state_h, state_c = decoder_lstm( decoder_mask, initial_state=decoder_states_inputs) decoder_states = [state_h, state_c] decoder_outputs = decoder_dense(decoder_outputs) self.decoder_model_ = Model( [decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states) self.reverse_target_char_index_ = dict( (i, char) for char, i in self.target_token_index_.items()) return self
import numpy as np import tensorflow as tf import tensorflow_probability as tfp from tensorflow.keras.models import Model from tensorflow.keras.layers import Dense, Input from tensorflow.keras.initializers import Constant, Orthogonal from tensorflow.keras.regularizers import l2 orth_weights_initializer = lambda scale: Orthogonal(gain=scale) const_bias_initializer = lambda value: Constant(value=value) l2_regularizer = lambda base_val=1.0: l2(l=base_val) # BN + dropout only for offline algorithms, see Liu et al Feb. 2020 def _mlp_actor_net_orth(hidden_layers=[64, 64]): def network_func(input_dim, output_dim): state = Input(shape=input_dim) x = state for i in range(len(hidden_layers)): x = Dense(hidden_layers[i], activation='tanh', kernel_initializer=orth_weights_initializer(np.sqrt(2)), bias_initializer=const_bias_initializer(0.0), kernel_regularizer=l2_regularizer())(x) action_mu = Dense(output_dim, activation='tanh', kernel_initializer=orth_weights_initializer(0.01),
Dense(units=64, kernel_initializer=HeUniform(), bias_initializer=Ones(), activation=relu) ]) model.summary() model.add(Dense(units=64, kernel_initializer=RandomNormal(mean=0., stddev=0.005), bias_initializer=Constant(value=0.03), activation=elu, name='rand_norm_0.0005')) model.add(Dense(units=8, kernel_initializer=Orthogonal(gain=0.9), bias_initializer=Constant(value=0.04), activation=selu)) model.summary() def my_custom_initializer(shape, dtype=None): return K.random_normal(shape, dtype=dtype) model.add(Dense(units=64, kernel_initializer=my_custom_initializer, activation=selu)) model.summary() fig, axes = plt.subplots(nrows=5, ncols=2, figsize=(12, 16))
def __init__(self, eps_std=0.05, seed=None): self.eps_std = eps_std self.seed = seed self.orthogonal = Orthogonal()
def createModel(self): self.model_instance += 1 clear_session() features, label = self.getDataset() X_train, y_train = self.createLag(features, label) X_train = X_train[:, self.lags] learning_rate = float(self.hyperparameters["Learning_Rate"].get()) momentum = float(self.hyperparameters["Momentum"].get()) optimizers = { "Adam": Adam(learning_rate=learning_rate), "SGD": SGD(learning_rate=learning_rate, momentum=momentum), "RMSprop": RMSprop(learning_rate=learning_rate, momentum=momentum) } shape = (X_train.shape[1], X_train.shape[2]) model_choice = self.model_var.get() if not self.do_optimization: model = Sequential() model.add(Input(shape=shape)) if model_choice == 0: model.add(Flatten()) layers = self.no_optimization_choice_var.get() for i in range(layers): neuron_number = self.neuron_numbers_var[i].get() activation_function = self.activation_var[i].get() if model_choice == 0: model.add(Dense(neuron_number, activation=activation_function, kernel_initializer=GlorotUniform(seed=0))) elif model_choice == 1: model.add(Conv1D(filters=neuron_number, kernel_size=2, activation=activation_function, kernel_initializer=GlorotUniform(seed=0))) model.add(MaxPooling1D(pool_size=2)) elif model_choice == 2: if i == layers-1: model.add(LSTM(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) else: model.add(LSTM(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) elif model_choice == 3: if i == layers-1: model.add(Bidirectional(LSTM(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))) model.add(Dropout(0.2)) else: model.add(Bidirectional(LSTM(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0)))) model.add(Dropout(0.2)) elif model_choice == 4: if i == layers-1: model.add(SimpleRNN(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) else: model.add(SimpleRNN(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) elif model_choice == 5: if i == layers-1: model.add(GRU(neuron_number, activation=activation_function, return_sequences=False, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) else: model.add(GRU(neuron_number, activation=activation_function, return_sequences=True, kernel_initializer=GlorotUniform(seed=0), recurrent_initializer=Orthogonal(seed=0))) model.add(Dropout(0.2)) if model_choice == 1: model.add(Flatten()) model.add(Dense(32, kernel_initializer=GlorotUniform(seed=0))) model.add(Dense(1, activation=self.output_activation.get(), kernel_initializer=GlorotUniform(seed=0))) model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get()) history = model.fit(X_train, y_train, epochs=self.hyperparameters["Epoch"].get(), batch_size=self.hyperparameters["Batch_Size"].get(), verbose=1, shuffle=False) loss = history.history["loss"][-1] self.train_loss.set(loss) elif self.do_optimization: layer = self.optimization_choice_var.get() if model_choice == 0: def build_model(hp): model = Sequential() model.add(Input(shape=shape)) model.add(Flatten()) for i in range(layer): n_min = self.neuron_min_number_var[i].get() n_max = self.neuron_max_number_var[i].get() step = int((n_max - n_min)/4) model.add(Dense(units=hp.Int('MLP_'+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu')) model.add(Dense(1)) model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get()) return model name = str(self.model_instance) + ". MLP" elif model_choice == 1: def build_model(hp): model = Sequential() model.add(Input(shape=shape)) for i in range(layer): n_min = self.neuron_min_number_var[i].get() n_max = self.neuron_max_number_var[i].get() step = int((n_max-n_min)/4) model.add(Conv1D(filters=hp.Int("CNN_"+str(i), min_value=n_min, max_value=n_max, step=step), kernel_size=2, activation="relu", kernel_initializer=GlorotUniform(seed=0))) model.add(MaxPooling1D(pool_size=2)) model.add(Flatten()) model.add(Dense(32, kernel_initializer=GlorotUniform(seed=0))) model.add(Dense(1, kernel_initializer=GlorotUniform(seed=0))) model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get()) return model name = str(self.model_instance) + ". CNN" elif model_choice == 2: def build_model(hp): model = Sequential() model.add(Input(shape=shape)) for i in range(layer): n_min = self.neuron_min_number_var[i].get() n_max = self.neuron_max_number_var[i].get() step = int((n_max - n_min)/4) model.add(LSTM(units=hp.Int("LSTM_"+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu', return_sequences=True, kernel_initializer=GlorotUniform(seed=0))) if i == layer-1: model.add(LSTM(units=hp.Int("LSTM_"+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu', return_sequences=False, kernel_initializer=GlorotUniform(seed=0))) model.add(Dense(1)) model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get()) return model name = str(self.model_instance) + ". LSTM" elif model_choice == 3: def build_model(hp): model = Sequential() model.add(Input(shape=shape)) for i in range(layer): n_min = self.neuron_min_number_var[i].get() n_max = self.neuron_max_number_var[i].get() step = int((n_max - n_min)/4) model.add(Bidirectional(LSTM(units=hp.Int("LSTM_"+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu', return_sequences=True, kernel_initializer=GlorotUniform(seed=0)))) if i == layer-1: model.add(Bidirectional(LSTM(units=hp.Int("LSTM_"+str(i), min_value=n_min, max_value=n_max, step=step), activation='relu', return_sequences=False, kernel_initializer=GlorotUniform(seed=0)))) model.add(Dense(1, kernel_initializer=GlorotUniform(seed=0))) model.compile(optimizer = optimizers[self.hyperparameters["Optimizer"].get()], loss=self.hyperparameters["Loss_Function"].get()) return model name = str(self.model_instance) + ". Bi-LSTM" tuner = RandomSearch(build_model, objective='loss', max_trials=25, executions_per_trial=2, directory=self.runtime, project_name=name) tuner.search(X_train, y_train, epochs=self.hyperparameters["Epoch"].get(), batch_size=self.hyperparameters["Batch_Size"].get()) hps = tuner.get_best_hyperparameters(num_trials = 1)[0] model = tuner.hypermodel.build(hps) history = model.fit(X_train, y_train, epochs=self.hyperparameters["Epoch"].get(), batch_size=self.hyperparameters["Batch_Size"].get(), verbose=1) loss = history.history["loss"][-1] self.train_loss.set(loss) for i in range(layer): if model_choice == 0: self.best_model_neurons[i].set(model.get_layer(index=i+1).get_config()["units"]) elif model_choice == 1: self.best_model_neurons[i].set(model.get_layer(index=(2*i)).get_config()["filters"]) elif model_choice == 2: self.best_model_neurons[i].set(model.get_layer(index=i).get_config()["units"]) elif model_choice == 3: self.best_model_neurons[i].set(model.get_layer(index=i).get_config()["layer"]["config"]["units"]) model.summary() self.model = model
from arena5.algos.hppo.GAE import GAE import tensorflow as tf from stable_baselines.common.policies import MlpPolicy, CnnPolicy from stable_baselines.common import tf_util, zipsame from stable_baselines.common.distributions import DiagGaussianProbabilityDistribution from tensorflow.keras.layers import Lambda, Input, LSTM, Dense, Reshape, Flatten, multiply, RepeatVector, Permute from tensorflow.keras.initializers import Orthogonal from stable_baselines.common import Dataset from tensorflow.keras import backend as K from stable_baselines.common.mpi_adam import MpiAdam from stable_baselines.common import tf_util, zipsame from tensorflow.keras.backend import set_session from stable_baselines import logger ORTHO_01 = Orthogonal(0.01) os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' class HPPOPolicy(): def __init__(self, env, policy_comm, **kwargs): # Pull params out of kwargs self.params = kwargs['params'] config = tf.ConfigProto() config.gpu_options.allow_growth = True # dynamically grow the memory used on the GPU config.log_device_placement = True # to log device placement (on which device the operation ran) sess = tf.Session(config=config) set_session(