def network(self): """ Assemble Critic network to predict q-values """ state = Input((self.env_dim)) x = Dense(32, activation='elu')(state) x = Dense(16, activation='elu')(x) out = Dense(1, activation='linear', kernel_initializer=RandomUniform())(x) return Model(state, out)
def create_network(self, S, G, num_A, dropout, l2reg): h = concatenate([S, G]) for l in self.layers: h = Dense(l, activation="relu", kernel_initializer=he_normal())(h) Q_values = Dense(num_A, activation='linear', kernel_initializer=RandomUniform(minval=-3e-4, maxval=3e-4))(h) return Q_values
def model(self, input: Any) -> Layer: size = len(self.alphabet()) initializer = RandomUniform(minval=-0.5, maxval=0.5) embedding = TimeDistributed( Embedding(size, 50, embeddings_initializer=initializer))(input) embedding = SpatialDropout1D(self.__droput)(embedding) output = TimeDistributed(Bidirectional(CuDNNLSTM(100)))(embedding) output = SpatialDropout1D(self.__droput)(output) return output
def __init__(self, env): np.random.seed(123) env.seed(123) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Next, we build a very simple model. self.actor = Sequential() self.actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) self.actor.add(Dense(16)) self.actor.add(Activation('relu')) self.actor.add(Dense(16)) self.actor.add(Activation('relu')) self.actor.add(Dense(16)) self.actor.add(Activation('relu')) self.actor.add( Dense(nb_actions, activation='tanh', kernel_initializer=RandomUniform())) self.actor.add(Lambda(lambda x: x * 60.0)) print(self.actor.summary()) action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=(1, ) + env.observation_space.shape, name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) # Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) self.agent = DDPGAgent(nb_actions=nb_actions, actor=self.actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) self.agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae'])
def build(self, input_shape): initializer_uniform = RandomUniform(minval=0, maxval=1) constraint_min_max = min_max_norm(min_value=0.0, max_value=1.0) self.b = self.add_weight(name='b', shape=(input_shape[-1], ), initializer=initializer_uniform, constraint=constraint_min_max, trainable=True) super(ANDNoisy, self).build(input_shape)
def __init__(self, output_dim, initializer=None, betas=1.0, **kwargs): self.output_dim = output_dim self.init_betas = betas if not initializer: self.initializer = RandomUniform(0.0, 1.0) #self.initializer = Orthogonal() else: self.initializer = initializer super(RBFLayer, self).__init__(**kwargs)
def _add_rnn_layer(self, rnn, return_sequences, x): if self.params["rnn_bidirectional"][x] == False: self.cnn = rnn( units=self.params["rnn_units"][x], dropout=self.params["rnn_dropout_input"][x], recurrent_dropout=self.params["rnn_dropout_recurrent"][x], kernel_initializer=RandomUniform(), kernel_constraint=max_norm(self.params["kernel_constraint"]), return_sequences=return_sequences)(self.cnn) else: self.cnn = Bidirectional( rnn(units=self.params["rnn_units"][x], dropout=self.params["rnn_dropout_input"][x], recurrent_dropout=self.params["rnn_dropout_recurrent"][x], kernel_initializer=RandomUniform(), kernel_constraint=max_norm( self.params["kernel_constraint"]), return_sequences=return_sequences))(self.cnn)
def get_fI(STATE_SIZE, ACTION_COUNT): model = Sequential(name='f_I') model.add( Dense(2048, activation="relu", input_shape=(STATE_SIZE * 2, ), kernel_initializer=RandomUniform(-0.1, 0.1))) model.add(Dense(ACTION_COUNT)) return model
def get_fM(STATE_SIZE, ACTION_COUNT, AGENT_HISTORY_LENGTH): state = Input(shape=(STATE_SIZE, ), name='current_state') actions = Input(shape=(ACTION_COUNT * AGENT_HISTORY_LENGTH, ), name='action_performed') x = Dense(2048, kernel_initializer=RandomUniform(-0.1, 0.1))(actions) x = Multiply()([x, state]) next_state = Dense(2048, name='next_state')(x) model = Model(inputs=[state, actions], outputs=[next_state], name='f_M') # return model
def _model( self, *, embedding_dim=2, embedding_max_size=10000, forecaster_features=1, forecaster_hidden_units=(8, 8), lr=0.1 ): # Embedder embedding_initialize = RandomUniform(minval=-1, maxval=1) customer_in = Input((1,)) embedding_out = Embedding(input_dim=embedding_max_size, output_dim=embedding_dim, input_length=1, embeddings_initializer=embedding_initialize )(customer_in) embedding_out = Flatten()(embedding_out) embedding_model = Model( customer_in, embedding_out, name='Embedder' ) # Forecaster features_in = Input((forecaster_features,)) embedding_in = Input((embedding_dim,)) forecaster_output = Concatenate()([features_in, embedding_in]) # append final output forecaster_dense_units = list(forecaster_hidden_units) + [1] for idx, units in enumerate(forecaster_dense_units): forecaster_output = Dense( units=units, activation='relu', name='forecaster_dense_{}'.format(idx) )(forecaster_output) forecaster_model = Model( [features_in, embedding_in], forecaster_output, name='Forecaster' ) # Combined model combined_output = forecaster_model( [features_in, embedding_model(customer_in)] ) combined_model = Model( [features_in, customer_in], combined_output, name='Combined' ) optimizer = Adam(lr=lr) combined_model.compile(optimizer=optimizer, loss='mse') return { 'forecaster': forecaster_model, 'embedder': embedding_model, 'combined': combined_model }
def create_model(self, critic_lr, channels, kernels, strides, activations, linear_units, units, low_dimensional): initializer = RandomUniform(-3e-4, 3e-4) self.obs = tf.placeholder(tf.float32, (None, ) + self.obs_shape) self.action = tf.placeholder(tf.float32, (None, ) + self.action_shape) obs = Input(tensor=self.obs) action = Input(tensor=self.action) i = 0 x = obs if low_dimensional: for u in units: x = Dense(u, activation='relu', kernel_initializer=initializer, bias_initializer=initializer)(x) y = Dense(units[-1], activation='relu', kernel_initializer=initializer, bias_initializer=initializer)(action) x = Add()([x, y]) else: for c, k, s, a in zip(channels, kernels, strides, activations): x = Conv2D(c, k, strides=(s, s), activation=a, kernel_initializer=initializer, bias_initializer=initializer)(x) x = Flatten()(x) # this to output layer x = Dense(linear_units, activation='relu', kernel_initializer=initializer, bias_initializer=initializer)(x) y = Dense(linear_units, activation='relu', kernel_initializer=initializer, bias_initializer=initializer)(action) x = Add()([x, y]) output = Dense(1, kernel_initializer=initializer, bias_initializer=initializer)(x) self.output = output self.model = Model(inputs=[obs, action], outputs=output) self.gradients = tf.gradients(self.output, self.action) self.q_targets = tf.placeholder(tf.float32, (None, 1)) self.loss = tf.reduce_mean( tf.squared_difference(self.output, self.q_targets)) self.optimizer = tf.train.AdamOptimizer(critic_lr) self.params = tf.trainable_variables(scope='critic') grads = self.optimizer.compute_gradients(self.loss) clipped_grads = [(tf.clip_by_value(grad, -1.0, 1.0), var) for grad, var in grads] self.optimize = self.optimizer.apply_gradients(clipped_grads)
def get_model(self): word_embeddings = self.word_embeddings case_embeddings = self.case_embeddings char_idx = self.char_idx label_idx = self.label_idx words_input = Input(shape=(None, ), dtype='int32', name='words_input') words = Embedding(input_dim=word_embeddings.shape[0], output_dim=word_embeddings.shape[1], weights=[word_embeddings], trainable=False)(words_input) casing_input = Input(shape=(None, ), dtype='int32', name='casing_input') casing = Embedding(output_dim=case_embeddings.shape[1], input_dim=case_embeddings.shape[0], weights=[case_embeddings], trainable=False)(casing_input) character_input = Input(shape=( None, 52, ), name='char_input') embed_char_out = TimeDistributed( Embedding(len(char_idx), 30, embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5)), name='char_embedding')(character_input) dropout = Dropout(0.5)(embed_char_out) conv1d_out = TimeDistributed( Conv1D(kernel_size=3, filters=30, padding='same', activation='tanh', strides=1))(dropout) maxpool_out = TimeDistributed(MaxPooling1D(52))(conv1d_out) char = TimeDistributed(Flatten())(maxpool_out) char = Dropout(0.5)(char) output = concatenate([words, casing, char]) output = Bidirectional( LSTM(200, return_sequences=True, dropout=0.50, recurrent_dropout=0.25))(output) output = TimeDistributed(Dense(len(label_idx), activation='softmax'))(output) model = Model(inputs=[words_input, casing_input, character_input], outputs=[output]) model.compile(loss='sparse_categorical_crossentropy', optimizer='nadam') model.summary() return model
def best_learning_rate(train_x, train_y, test_x, test_y, lr, batch_size=256): """ Perform 10 random initializations of model, then compile and fit it. Model is initialized randomly (random_uniform), compiled and fitted. Operation is repeated 10 times. Then all histories are returned as a list. Parameters ---------- train_x : np.array(float) Training `x` set (training features). train_y : np.array(int) Training `y` set (training labels). test_x : np.array(float) Test `x` set (test features). test_y : np.array(int) Test `y` set (test labels). lr : float Learning rate. batch_size : int, optional Size of batch (amount of samples) for model fitting. Returns ------- history_set : list(keras.callbacks.History object) History of loss, accuracy, validation loss and validation accuracy during model fitting. """ optimizer = TFOptimizer(tf.train.GradientDescentOptimizer(lr)) history_set = [] for i in range(10): model = Sequential() initializer = RandomUniform(minval=-1.0, maxval=1.0, seed=None) model.add( Dense(1, kernel_initializer=initializer, bias_initializer=initializer, input_dim=train_x.shape[1], activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) history = model.fit(train_x, train_y, epochs=1000, validation_data=(test_x, test_y), batch_size=batch_size, verbose=0) history_set.append(history) return history_set
def __init__(self, learning_rate=None, vocab_size=None, embedding_size=None, rnn_output_size=None, dropout_rate=None, bidirectional_rnn=None, rnn_type=None, rnn_layers=None, l1_reg=None, l2_reg=None, initializer=None, word_vector_init=None): """ If an arg is None, it will get its value from config.active_config. """ self._learning_rate = learning_rate or active_config().learning_rate self._vocab_size = vocab_size or active_config().vocab_size self._embedding_size = embedding_size or active_config().embedding_size self._rnn_output_size = (rnn_output_size or active_config().rnn_output_size) self._dropout_rate = dropout_rate or active_config().dropout_rate self._rnn_type = rnn_type or active_config().rnn_type self._rnn_layers = rnn_layers or active_config().rnn_layers self._word_vector_init = (word_vector_init or active_config().word_vector_init) self._initializer = initializer or active_config().initializer if self._initializer == 'vinyals_uniform': self._initializer = RandomUniform(-0.08, 0.08) if bidirectional_rnn is None: self._bidirectional_rnn = active_config().bidirectional_rnn else: self._bidirectional_rnn = bidirectional_rnn l1_reg = l1_reg or active_config().l1_reg l2_reg = l2_reg or active_config().l2_reg self._regularizer = l1_l2(l1_reg, l2_reg) self._keras_model = None if self._vocab_size is None: raise ValueError('config.active_config().vocab_size cannot be ' 'None! You should check your config or you can ' 'explicitly pass the vocab_size argument.') if self._rnn_type not in ('lstm', 'gru'): raise ValueError('rnn_type must be either "lstm" or "gru"!') if self._rnn_layers < 1: raise ValueError('rnn_layers must be >= 1!') if self._word_vector_init is not None and self._embedding_size != 300: raise ValueError('If word_vector_init is not None, embedding_size ' 'must be 300')
def test_name_entity_recognition(self): K.clear_session() words_input = Input(shape=(None, ), dtype='int32', name='words_input') words = Embedding(input_dim=10, output_dim=20, weights=None, trainable=False)(words_input) casing_input = Input(shape=(None, ), dtype='int32', name='casing_input') casing = Embedding(output_dim=20, input_dim=12, weights=None, trainable=False)(casing_input) character_input = Input(shape=( None, 52, ), name='char_input') embed_char_out = TimeDistributed( Embedding(26, 20, embeddings_initializer=RandomUniform(minval=-0.5, maxval=0.5)), name='char_embedding')(character_input) dropout = Dropout(0.5)(embed_char_out) conv1d_out = TimeDistributed( Conv1D(kernel_size=3, filters=30, padding='same', activation='tanh', strides=1))(dropout) maxpool_out = TimeDistributed(MaxPooling1D(52))(conv1d_out) char = TimeDistributed(Flatten())(maxpool_out) char = Dropout(0.5)(char) output = concatenate([words, casing, char]) output = Bidirectional( LSTM(200, return_sequences=True, dropout=0.50, recurrent_dropout=0.25))(output) output = TimeDistributed(Dense(35, activation='softmax'))(output) keras_model = Model( inputs=[words_input, casing_input, character_input], outputs=[output]) data1 = np.random.rand(2, 6).astype(np.float32) data2 = np.random.rand(2, 6).astype(np.float32) data3 = np.random.rand(2, 6, 52).astype(np.float32) expected = keras_model.predict([data1, data2, data3]) onnx_model = keras2onnx.convert_keras(keras_model, keras_model.name) self.assertTrue( run_keras_and_ort(onnx_model.graph.name, onnx_model, keras_model, [data1, data2, data3], expected, self.model_files, compare_perf=True))
def Minecraft_DDPG(window_length, grayscale, width, height, nb_actions): assert width == 32 and height == 32, 'Model accepts 32x32 input size' if grayscale: channels = 1 else: channels = 3 if K.image_data_format() == 'channels_last': observation_shape = (32, 32, window_length * channels) else: observation_shape = (window_length * channels, 32, 32) # Build actor and critic networks inputs = Input(shape=observation_shape) x = Conv2D(32, (4, 4), strides=(2, 2), activation='relu')(inputs) x = Conv2D(64, (3, 3), strides=(2, 2), activation='relu')(x) x = Flatten()(x) x = Dense(256, activation='relu')(x) x = Dense(256, activation='relu')(x) x = Dense(nb_actions, activation='tanh', kernel_initializer=RandomUniform(-3e-4, 3e-4))(x) actor = Model(inputs=inputs, outputs=x) print(actor.summary()) # critic network has 2 inputs, one action input and one observation input. action_input = Input(shape=(nb_actions, ), name='action_input') observation_input = Input(shape=observation_shape, name='observation_input') x = Conv2D(32, (4, 4), strides=(2, 2), activation='relu')(observation_input) x = Conv2D(64, (3, 3), strides=(2, 2), activation='relu')(x) x = Flatten()(x) x = Dense(256, activation='relu')(x) x = concatenate([x, action_input ]) # Actions are not included until the 2nd dense layer x = Dense(256, activation='relu')(x) x = Dense(1, activation='linear', kernel_initializer=RandomUniform(-3e-4, 3e-4))(x) critic = Model(inputs=[action_input, observation_input], outputs=x) print(critic.summary()) return actor, critic, action_input
def create_critic_network(self, S, V): if self.network == '0': L1 = concatenate([multiply([subtract([S, G]), M]), S]) L2 = Dense(400, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01))(L1) L3 = Dense(300, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01))(L2) Q_values = Dense(self.env.action_dim, activation='linear', kernel_initializer=RandomUniform(minval=-3e-4, maxval=3e-4), kernel_regularizer=l2(0.01), bias_initializer=RandomUniform(minval=-3e-4, maxval=3e-4))(L3) else: L1 = Dense(200, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01)) L2 = Dense(300, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01)) i1 = multiply([subtract([S, G]), M]) i2 = S h1 = L1(i1) h2 = L1(i2) h3 = concatenate([h1, h2]) h4 = L2(h3) Q_values = Dense(self.env.action_dim, activation='linear', kernel_initializer=RandomUniform(minval=-3e-4, maxval=3e-4), kernel_regularizer=l2(0.01), bias_initializer=RandomUniform(minval=-3e-4, maxval=3e-4))(h4) return Q_values
def generator_model(noise_dim, feature_dim): noise_input = keras.layers.Input(shape=(noise_dim, )) eeg_input = keras.layers.Input(shape=(feature_dim, )) x = MoGLayer(kernel_initializer=RandomUniform(minval=-0.2, maxval=0.2), bias_initializer=RandomUniform(minval=-1.0, maxval=1.0), kernel_regularizer=l2(0.01))(noise_input) x = keras.layers.concatenate([x, eeg_input]) x = Dense(1024, activation="tanh")(x) x = BatchNormalization(momentum=0.8)(x) x = Dense(128 * 7 * 7, activation="tanh")(x) x = Reshape((7, 7, 128))(x) x = UpSampling2D()(x) x = BatchNormalization(momentum=0.8)(x) x = Conv2D(64, kernel_size=5, padding="same", activation="tanh")(x) x = UpSampling2D()(x) x = Conv2D(1, kernel_size=3, padding="same")(x) output = Activation("tanh")(x) return Model(inputs=[noise_input, eeg_input], outputs=[output])
def network(self): state = Input((self.state_dim,)) action = Input((self.action_dim,)) x = Dense(800, activation='relu')(state) #x = concatenate([Flatten()(x), action]) x = concatenate([x, action]) x = Dense(500, activation='relu')(x) out = Dense(1, activation='linear', kernel_initializer=RandomUniform())(x) return Model([state, action], out)
def network(self): """ Assemble Critic network to predict q-values """ state = Input(shape=[self.env_dim]) action = Input(shape=[self.act_dim]) x = Dense(256, activation='relu')(state) x = concatenate([x, action]) x = Dense(256, activation='relu')(x) out = Dense(1, activation='linear', kernel_initializer=RandomUniform())(x) return Model([state, action], out)
def build(self, shape_input): M = shape_input[1] - 1 self.I = k_back.eye(M) init_mu = RandomUniform(minval=0.01, maxval=10) init_pfd = RandomUniform(minval=0.01, maxval=10) self.mu = self.add_weight('mu', shape=(M, 1), initializer=init_mu, constraint=NonNeg()) data_p = self.add_weight('data_p', shape=(M, M - 1), initializer=init_pfd, constraint=NonNeg()) data_p_scaled = data_p / k_back.sum(data_p, axis=1, keepdims=True) self.P = k_back.reshape( k_back.flatten(data_p_scaled)[None, :] @ k_back.one_hot( [j for j in range(M * M) if j % (M + 1) != 0], M * M), (M, M)) self.odot = (self.P - self.I) * self.mu self.is_built = True
def create_critic_network(self, S, G=None, M=None): input = concatenate([multiply([subtract([S, G]), M]), S]) L1 = Dense(400, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01)) L1out = L1(input) L2 = Dense(300, activation="relu", kernel_initializer=lecun_uniform(), kernel_regularizer=l2(0.01)) L2out = L2(L1out) L3 = Dense(self.env.action_dim, activation='linear', kernel_initializer=RandomUniform(minval=-3e-4, maxval=3e-4), kernel_regularizer=l2(0.01), bias_initializer=RandomUniform(minval=-3e-4, maxval=3e-4)) Q_values = L3(L2out) return [L1, L2, L3], Q_values
def network(self): state = Input((self.env_dim, )) action = Input((self.act_dim, )) x = Dense(128, activation='relu')(state) x = concatenate([x, action]) x = Dense(256, activation='relu')(x) out = Dense(self.env_dim, activation='linear', kernel_initializer=RandomUniform())(x) return Model([state, action], out)
def build(self): phi = Input(name='Xs', shape=(21, )) # Learning to Rank out_ = Dense(1, kernel_initializer=RandomUniform(minval=-0.014, maxval=0.014), bias_initializer='zeros')(phi) model = Model(inputs=[phi], outputs=[out_]) return model
def __init__(self, learning_rate=None, vocab_size=None, embedding_size=None, rnn_output_size=None, dropout_rate=None, bidirectional_rnn=None, rnn_type=None, rnn_layers=None, l1_reg=None, l2_reg=None, initializer=None, word_vector_init=None): self.keras_model = None self.learning_rate = learning_rate or base_configuration["params"][ "learning_rate"] self.vocab_size = vocab_size or base_configuration["params"][ "vocab_size"] self.embedding_size = embedding_size or base_configuration["params"][ "embedding_size"] self.rnn_output_size = rnn_output_size or base_configuration["params"][ "rnn_output_size"] self.dropout_rate = dropout_rate or base_configuration["params"][ "dropout_rate"] self.rnn_type = rnn_type or base_configuration["params"]["rnn_type"] self.rnn_layers = rnn_layers or base_configuration["params"][ "rnn_layers"] self.word_vector_init = word_vector_init or base_configuration[ "params"]["word_vector_init"] self.initializer = initializer or base_configuration["params"][ "initializer"] if self.initializer == 'vinyals_uniform': self.initializer = RandomUniform(-0.08, 0.08) self.bidirectional_rnn = bidirectional_rnn or base_configuration[ "params"]["bidirectional_rnn"] self.regularizer = l1_l2() # (l1_reg, l2_reg) if self.vocab_size is None: raise ValueError( 'config.active_config().vocab_size cannot be None! You should check your config or you can' ' explicitly pass the vocab_size argument.') if self.rnn_type not in ('lstm', 'gru'): raise ValueError('rnn_type must be either "lstm" or "gru"!') if self.rnn_layers < 1: raise ValueError('rnn_layers must be >= 1!') if self.word_vector_init is not None and self.embedding_size != 300: raise ValueError( 'If word_vector_init is not None, embedding_size must be 300')
def main(): units = 512 rng_units = 128 z_k = 10 pz_regularizer = BalanceRegularizer(1e-2) iwgan_weight = 1e-1 initializer = RandomUniform(minval=-0.05, maxval=0.05) ((x, y), (xt, yt)) = mnist.load_data() x = np.float32(x) / 255. x = np.reshape(x, (x.shape[0], -1)) input_units = 28 * 28 classifier = MLP(input_units=input_units, hidden_units=units, output_units=z_k, hidden_depth=2, hidden_activation=leaky_relu, initializer=initializer, output_activation=softmax_nd) generator = MLP(input_units=units, hidden_units=units, output_units=input_units, hidden_depth=2, hidden_activation=leaky_relu, initializer=initializer, output_activation=T.nnet.sigmoid) discriminator = MLP(input_units=units, hidden_units=units, output_units=1, hidden_depth=2, initializer=initializer, hidden_activation=leaky_relu) model = FCGAN( z_k=z_k, classifier=classifier, generator=generator, discriminator=discriminator, optd=Adam(1e-3), optg=Adam(1e-3), input_units=input_units, rng_units=rng_units, units=units, activation=leaky_relu, pz_regularizer=pz_regularizer, initializer=initializer, iwgan_weight=iwgan_weight ) model.train( x=x, output_path='output/mnist_clustering', epochs=500, batches=512, discriminator_batches=5, batch_size=128 )
def __init__(self, config): self.config = config recurrent_unit = self.config.recurrent_unit.lower() get_custom_objects().update({'EncoderSlice': EncoderSlice, 'DecoderSlice': DecoderSlice}) initial_weights = RandomUniform(minval=-0.08, maxval=0.08, seed=config.seed) stacked_input = Input(shape=(None,)) # encoder_input = Lambda(lambda x: x[:, config.input_split_index:])(stacked_input) encoder_input = EncoderSlice(config.input_split_index)(stacked_input) encoder_embedding = Embedding(config.source_vocab_size, config.embedding_dim, weights=[config.source_embedding_map], trainable=False) encoder_embedded = encoder_embedding(encoder_input) if recurrent_unit == 'lstm': encoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True, recurrent_initializer=initial_weights)(encoder_embedded) for i in range(1, self.config.num_encoder_layers): encoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(encoder) _, state_h, state_c = encoder encoder_states = [state_h, state_c] else: encoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True, recurrent_initializer=initial_weights)(encoder_embedded) for i in range(1, self.config.num_encoder_layers): encoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(encoder) _, state_h = encoder encoder_states = [state_h] # decoder_input = Lambda(lambda x: x[:, config.input_split_index:])(stacked_input) decoder_input = DecoderSlice(config.input_split_index)(stacked_input) decoder_embedding = Embedding(config.target_vocab_size, config.embedding_dim, weights=[config.target_embedding_map], trainable=False) decoder_embedded = decoder_embedding(decoder_input) if recurrent_unit.lower() == 'lstm': decoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder_embedded, initial_state=encoder_states) for i in range(1, self.config.num_decoder_layers): decoder = LSTM(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder) decoder_output, decoder_state = decoder[0], decoder[1:] else: decoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder_embedded, initial_state=encoder_states) for i in range(1, self.config.num_decoder_layers): decoder = GRU(self.config.hidden_dim, return_state=True, return_sequences=True)(decoder) decoder_output, decoder_state = decoder[0], decoder[1] decoder_dense = Dense(config.target_vocab_size, activation='softmax') decoder_output = decoder_dense(decoder_output) self.model = Model(stacked_input, decoder_output) optimizer = Adam(lr=config.lr, clipnorm=25.) self.model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['acc']) print(self.model.summary())
def decoder(self, params): print('genetic_params:', params) NUM_NET_1 = 1 # for LR NUM_NET_2 = 8 # for the rest network params NUM_TIME = 3 NUM_DELAY_TYPE = 3 NUM_DELAY = 4 # netowr_params BATCH_SIZE = [16, 32, 64, 128] SEQ_LEN = [16, 32, 64, 128] STATE_SIZE = [16, 32, 64, 128] LR = list(np.logspace(-3, -6, 16)) DR = [0.99, 0.98, 0.97, 0.96] PKEEP = [0.9, 0.8, 0.7, 0.6] ACTIVATION = ["relu", "tanh", "sigmoid", "softsign"] INIT = [zeros(), TruncatedNormal(), Orthogonal(), RandomUniform()] net_name = ['lr', 'batch_size', 'seq_len', 'state_size', 'dr', 'pkeep', 'optimizer', 'activation_f', 'initializer'] network_params = {} network_params['lr'] = LR[BitArray(params[0: NUM_NET_1 * 4]).uint] for i in range(NUM_NET_2): name = net_name[i + 1] network_params[name] = BitArray(params[4 + i * 2: 4 + i * 2 + 2]).uint network_params['batch_size'] = BATCH_SIZE[network_params['batch_size']] network_params['seq_len'] = SEQ_LEN[network_params['seq_len']] network_params['state_size'] = STATE_SIZE[network_params['state_size']] network_params['dr'] = DR[network_params['dr']] network_params['pkeep'] = PKEEP[network_params['pkeep']] network_params['activation_f'] = ACTIVATION[network_params['activation_f']] network_params['initializer'] = INIT[network_params['initializer']] # timeseries_params timeseries_params = {} TIME_STEP_DAYS = [7, 14, 30, 60] TIME_STEP_WEEKS = [4, 8, 12, 24] TIME_STEP_MONTHS = [2, 3, 6, 9] TIME_STEP = [TIME_STEP_DAYS, TIME_STEP_WEEKS, TIME_STEP_MONTHS] step_name = ['time_series_step_days', 'time_series_step_weeks', 'time_series_step_months'] for index in range(NUM_TIME): name = step_name[index] step = TIME_STEP[index] timeseries_params[name] = step[BitArray(params[20 + index * 2: 20 + index * 2 + 2]).uint] DELAY = [7, 14, 30, 60, 90, 120, 150, 180] delay_name_days = ['delay_google_days', 'delay_tweeter_days', 'delay_macro_days', 'delay_tweeter_re_days'] delay_name_weeks = ['delay_google_weeks', 'delay_tweeter_weeks', 'delay_macro_weeks', 'delay_tweeter_re_weeks'] delay_name_months = ['delay_google_months', 'delay_tweeter_months', 'delay_macro_months', 'delay_tweeter_re_months'] delay_name = [delay_name_days, delay_name_weeks, delay_name_months] for type in range(NUM_DELAY_TYPE): name_list = delay_name[type] for index in range(NUM_DELAY): name = name_list[index] timeseries_params[name] = DELAY[BitArray(params[26 + index * 3: 26 + index * 3 + 3]).uint] return network_params, timeseries_params
def main(): # XOR data set data = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.float32) target = np.array([[0], [1], [1], [0]], dtype=np.float32) # Show the 2D data colors = np.array([ [1.0, 0.0, 0.0], # Red [0.0, 0.0, 1.0] ]) # Blue c = colors[np.squeeze(target == 0).astype(np.int)] fig = plt.figure(figsize=(4, 4)) ax = fig.add_subplot(111) ax.scatter(data[:, 0], data[:, 1], c=c, marker='x') ax.set_title('XOR dataset (2D)') ax.set_xlabel('First input') ax.set_ylabel('Second input') fig.tight_layout() plt.show() # Create neural network model = Sequential() model.add( Dense(units=2, activation='sigmoid', input_shape=(2, ), kernel_initializer=RandomUniform(minval=-0.01, maxval=0.01))) model.add(Dense(units=1, activation='linear')) print(model.summary()) # Define training parameters model.compile(optimizer=SGD(lr=0.5, momentum=0.9), loss='mse') # Perform training model.fit(data, target, batch_size=len(data), epochs=1000, shuffle=True, verbose=1) # Save trained model to disk model.save('xor.h5') # Test model (loading from disk) model = load_model('xor.h5') targetPred = model.predict(data) # Print the number of classification errors from the training data nbErrors = np.sum(np.round(targetPred) != target) accuracy = (len(data) - nbErrors) / len(data) print('Classification accuracy: %0.3f' % (accuracy))
def __init__(self, segments=2, alpha_initializer=RandomUniform(0., 1.), beta_initializer=RandomUniform(0., 1.), alpha_regularizer=l2(1e-3), beta_regularizer=l2(1e-3), shared_axes=None, **kwargs): super(APLU, self).__init__(**kwargs) self.segments = segments self.alpha_initializer = alpha_initializer self.beta_initializer = beta_initializer self.alpha_regularizer = alpha_regularizer self.beta_regularizer = beta_regularizer if shared_axes is None: self.shared_axes = None elif not isinstance(shared_axes, (list, tuple)): self.shared_axes = [shared_axes] else: self.shared_axes = list(shared_axes)