def train(model, preprocess, num_epochs, batch_size, num_steps, lr): optimizer = Adam(lr=lr, clipnorm=0.01) now = time.time() loss = tf.keras.losses.SparseCategoricalCrossentropy() cnt = 0 for epoch in range(num_epochs): l_sum = 0 n = 0 data_iter = preprocess.get_data_iter(batch_size=batch_size, num_steps=num_steps) for x, y in data_iter: with tf.GradientTape(persistent=True) as tape: x = tf.one_hot(x, depth=len(preprocess.idx_to_char)) whole_sequence, _, _ = model(x) l = loss(y, whole_sequence) grads = tape.gradient(l, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) l_sum += np.array(l).item() * len(y) n += len(y) print('epoch %d, perplexity %f, time %.2f sec' % (epoch + 1, math.exp(l_sum / n), time.time() - now)) print( predict_rnn('分开', 50, model, len(preprocess.idx_to_char), preprocess.idx_to_char, preprocess.char_to_idx)) cnt += 1
class ActorCritic: def __init__(self, n_actions, p_lr=.001, c_lr=.001): self.policy = Policy(n_actions=n_actions) self.critic = Critic() self.p_optim = Adam(learning_rate=p_lr) self.c_optim = Adam(learning_rate=c_lr) @tf.function def train_step(self, env, initial_state: tf.Tensor, gamma: float, max_steps_per_episode: int) -> tf.Tensor: episode_reward = tf.constant(0) for t in range(max_steps_per_episode): with tf.GradientTape() as tape, tf.GradientTape() as tape2: results = run_episode_step(env, initial_state, gamma, max_steps_per_episode) next_state, done, action_probs, value, next_value, reward = results done, action_probs, value, next_value, reward = [ tf.expand_dims(x, 1) for x in [done, action_probs, value, next_value, reward] ] actor_loss = compute_loss(action_probs, value, next_value, reward, done) critic_loss = huber_loss(values, reward + tf.cast(gamma, tf.float32) * next_value) actor_grads = tape.gradient(actor_loss, self.policy.trainable_variables) critic_loss = tape2.gradient(critic_loss, self.critic.trainable_variables) self.p_optim.apply_gradients(zip(loss, self.policy.trainable_variables)) self.c_optim.apply_gradients(zip(loss, self.policy.trainable_variables)) episode_reward += tf.reduce_sum(reward) if tf.cast(done, tf.bool): return episode_reward
def pre_train(generator, train_dataset, valid_dataset, steps, evaluate_every=1,lr_rate=1e-4): loss_mean = Mean() pre_train_loss = MeanSquaredError() pre_train_optimizer = Adam(lr_rate) now = time.perf_counter() step = 0 for lr, hr in train_dataset.take(steps): step = step+1 with tf.GradientTape() as tape: lr = tf.cast(lr, tf.float32) hr = tf.cast(hr, tf.float32) sr = generator(lr, training=True) loss_value = pre_train_loss(hr, sr) gradients = tape.gradient(loss_value, generator.trainable_variables) pre_train_optimizer.apply_gradients(zip(gradients, generator.trainable_variables)) loss_mean(loss_value) if step % evaluate_every == 0: loss_value = loss_mean.result() loss_mean.reset_states() psnr_value = evaluate(generator, valid_dataset) duration = time.perf_counter() - now print( f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)') now = time.perf_counter()
def train(self,X,Y,learningRate,indexes=None): #very sorry L2 is currtently out of order I will fix this later #apply L2 regularization to avoid overfitting #this is really really important #regularizer=l2(L2val)#just to be clear this is tf.keras.regularizers.l2 #regularizer(self.weights) #compute gradients of weights and biases with GradientTape() as g: myTrainableVariables=self.getTrainableVariables() g.watch(myTrainableVariables) #calculate error if self.debug: print("EXECUTING") guess=self.evaluate(X) #calculate error using sqared error if self.debug: print("TRAINING") if self.errorFunction.multipleLabels: error=self.errorFunction.execute(guess,Y) else: error=self.errorFunction.execute(guess,Y,indexes) optimizer=Adam(learningRate) grads=g.gradient(error,myTrainableVariables) optimizer.apply_gradients(zip(grads,myTrainableVariables),) return error
class Collection_Critic(tf.keras.Model): def __init__(self, critics): super().__init__(name="Coll_Critic") self.crit = critics self.optimizer = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9) self.n = 0 def update_current_n_layer(self): self.n += 1 def start_fading(self, n): self.crit[len(self.crit) - 1 - n].activate_fade_in() self.update_current_n_layer() def stop_fading(self, n): self.crit[len(self.crit) - 1 - n].disactivate_fade_in() def call(self, input_tensor): x = input_tensor for i in range(len(self.crit) - 1 - self.n, len(self.crit)): x = self.crit[i](x) return x def compute_loss(self, y_true, y_pred): """ Wasserstein loss """ return backend.mean(y_true * y_pred) def backPropagate(self, gradients, trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables))
def train(self,X,Y,Yi,learningRate,L2val): #very sorry L2 is currtently out of order I will fix this later #apply L2 regularization to avoid overfitting #this is really really important #regularizer=l2(L2val)#just to be clear this is tf.keras.regularizers.l2 #regularizer(self.weights) #compute gradients of weights and biases with GradientTape() as g: for i in range(len(self.nHidden)+1):#iterate over layers g.watch(self.getTrainableVariables()) #calculate error guess=self.evaluate([[constant(j) for j in i] for i in X])#convert everything in x to tensorflow format #calculate error using sqared error error=0 for i in range(len(Y)): error+=(guess[i][Yi[i]]-Y[i][Yi[i]])**2 error=error/len(Y) optimizer=Adam(learningRate) grads=g.gradient(error,self.getTrainableVariables()) optimizer.apply_gradients(zip(grads,self.getTrainableVariables()),) return error
def fit(self, batch, epochs=10, batch_size=32, verbose=True, **kwargs): """ Fit model on given batch Parameters: batch: List of tuple: (states, game result (value), node values (probs)) epochs: Number of epochs to train with (optional, default=10) batch_size: (optional, default=32) """ optimizer = Adam(**kwargs) for e in range(epochs): batch_sample = random.sample(batch, batch_size) states, true_values, true_probs = self.transform_batch( batch_sample) with tf.GradientTape() as tape: pred_values, pred_probs = self(states) # Add a square to give more importance to the value loss value_loss = tf.math.square( tf.keras.losses.mean_squared_error(true_values, pred_values)) prob_loss = -tf.keras.losses.categorical_crossentropy( true_probs, pred_probs) total_loss = value_loss + prob_loss gradients = tape.gradient(total_loss, self.trainable_variables) optimizer.apply_gradients(zip(gradients, self.trainable_variables)) tf.print("Probs loss:", sum(prob_loss), "Value loss:", sum(value_loss), "Total loss:", sum(total_loss))
class REINFORCEAgent: def __init__(self, state_size, action_size): # 상태의 크기와 행동의 크기 정의 self.state_size = state_size self.action_size = action_size # REINFORCE 하이퍼 파라메터 self.discount_factor = 0.99 self.learning_rate = 0.001 self.model = REINFORCE(self.action_size) self.optimizer = Adam(lr=self.learning_rate) self.states, self.actions, self.rewards = [], [], [] # 정책신경망으로 행동 선택 def get_action(self, state): policy = self.model(state)[0] policy = np.array(policy) return np.random.choice(self.action_size, 1, p=policy)[0] # 반환값 계산 def discount_rewards(self, rewards): discounted_rewards = np.zeros_like(rewards) running_add = 0 for t in reversed(range(0, len(rewards))): running_add = running_add * self.discount_factor + rewards[t] discounted_rewards[t] = running_add return discounted_rewards # 한 에피소드 동안의 상태, 행동, 보상을 저장 def append_sample(self, state, action, reward): self.states.append(state[0]) self.rewards.append(reward) act = np.zeros(self.action_size) act[action] = 1 self.actions.append(act) # 정책신경망 업데이트 def train_model(self): discounted_rewards = np.float32(self.discount_rewards(self.rewards)) discounted_rewards -= np.mean(discounted_rewards) discounted_rewards /= np.std(discounted_rewards) # 크로스 엔트로피 오류함수 계산 model_params = self.model.trainable_variables with tf.GradientTape() as tape: tape.watch(model_params) policies = self.model(np.array(self.states)) actions = np.array(self.actions) action_prob = tf.reduce_sum(actions * policies, axis=1) cross_entropy = -tf.math.log(action_prob + 1e-5) loss = tf.reduce_sum(cross_entropy * discounted_rewards) entropy = -policies * tf.math.log(policies) # 오류함수를 줄이는 방향으로 모델 업데이트 grads = tape.gradient(loss, model_params) self.optimizer.apply_gradients(zip(grads, model_params)) self.states, self.actions, self.rewards = [], [], [] return np.mean(entropy)
def train(generator, discriminator, train_ds, valid_ds, steps=2000, lr_rate=1e-4): generator_optimizer = Adam(learning_rate=lr_rate) discriminator_optimizer = Adam(learning_rate=lr_rate) vgg = vgg() pls_metric = tf.keras.metrics.Mean() dls_metric = tf.keras.metrics.Mean() steps = steps step = 0 for lr, hr in train_ds.take(steps): step += 1 with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: lr = tf.cast(lr, tf.float32) hr = tf.cast(hr, tf.float32) # Forward pass sr = generator(lr, training=True) hr_output = discriminator(hr, training=True) sr_output = discriminator(sr, training=True) # Compute losses con_loss = content_loss(vgg, hr, sr) gen_loss = generator_loss(sr_output) perc_loss = con_loss + 0.001 * gen_loss disc_loss = discriminator_loss(hr_output, sr_output) # Compute gradient of perceptual loss w.r.t. generator weights gradients_of_generator = gen_tape.gradient( perc_loss, generator.trainable_variables) # Compute gradient of discriminator loss w.r.t. discriminator weights gradients_of_discriminator = disc_tape.gradient( disc_loss, discriminator.trainable_variables) # Update weights of generator and discriminator generator_optimizer.apply_gradients( zip(gradients_of_generator, generator.trainable_variables)) discriminator_optimizer.apply_gradients( zip(gradients_of_discriminator, discriminator.trainable_variables)) pl, dl = perc_loss, disc_loss pls_metric(pl) dls_metric(dl) print( f'{step}/{steps}, perceptual loss = {pls_metric.result():.4f}, discriminator loss = {dls_metric.result():.4f}' ) pls_metric.reset_states() dls_metric.reset_states() generator.save_weights('pre-trained/generator.h5') discriminator.save_weights('pre-trained/discriminator.h5')
class TrainAgent: def __init__(self, env: '', episodes=1000, alpha=0.01, gamma=0.9, alpha_decay_rate=0.9): self.env = Environment(env=env) self.episodes = episodes self.lr = ExponentialDecay(alpha, episodes, alpha_decay_rate) self.optimizer = Adam(self.lr) self.action_count, self.states_count = self.env.spaces_count() self.gamma = gamma self._net = ReinforcePolicyNet(action_count=self.action_count, states_count=self.states_count) self._model = ReinforcePolicyModel(self._net) self._agent = ReinforcePolicyAgent(env=self.env, model=self._model, gamma=gamma) self.huber_loss = Huber(reduction=tf.keras.losses.Reduction.SUM) def compute_loss(self, action_prob, epi_return, values): """ actually action prob is our policy that give each action a probability over a distribution. here the actor loss is -mean(pi(a|s) * Bt) which should be minimized, 'Bt ' refers to the Baseline that we use with the purpose of reducing the variance """ advantage = epi_return - values prob = tf.math.log(action_prob + 1e-30) actor_loss = -tf.math.reduce_mean(prob * advantage) critic_loss = self.huber_loss(values, epi_return) return critic_loss + actor_loss @tf.function def train_step(self, init_state: tf.Tensor): with tf.GradientTape() as tape: episode_return, action_probs, rewards, values = self._agent.run(max_steps=200, init_state=init_state) loss = self.compute_loss(action_prob=action_probs, epi_return=episode_return, values=values) grads = tape.gradient(loss, self._net.trainable_variables) self.optimizer.apply_gradients(zip(grads, self._net.trainable_variables)) episode_rewards = tf.math.reduce_sum(rewards) return episode_rewards @staticmethod def plot_me(total, avg, cnt): plt.clf() plt.plot(cnt, total, label='rewards') plt.plot(cnt, avg, label='average reward') plt.legend() plt.pause(0.01) def run(self): e_r = [] count = [] avg_reward = [] for episode in range(self.episodes): init_state = tf.constant(self.env.reset_env(), dtype=tf.float32) e_r.append(int(self.train_step(init_state))) count.append(episode) avg = sum(e_r) / len(count) avg_reward.append(avg) self.plot_me(e_r,avg_reward, count) print(f"episode {episode}/{self.episodes}, reward: {e_r[episode]}")
def _train_vanilla_gan_on_mnist(args): model_name = args.model_name n_epochs = args.n_epochs batch_size = args.batch_size generator_model = GeneratorModelMNIST(**args) discriminator_model = DiscriminatorModelMNIST(**args) generator_optimizer = Adam(1e-4) discriminator_optimizer = Adam(1e-4) data_generator = get_mnist_dataset() noise_dim = args.generator_noise_dim num_examples_to_generate = args.num_examples_to_generate seed = tf.random.normal([num_examples_to_generate, noise_dim]) plotting_callback = ml_utils.PlotAndSaveImages(test_input=seed, model=generator_model, model_name=model_name) gen_ckpt = ml_utils.SimpleModelCheckPoint(model_name="mnist_generator", model=generator_model) disc_ckpt = ml_utils.SimpleModelCheckPoint( model_name="mnist_discriminator", model=discriminator_model) num_iterations = len(data_generator) with tf.GradientTape() as gen_tape, tf.GradientTape() as disc_tape: for epoch in range(n_epochs): start = time() for i, image_batch in enumerate(data_generator): input_noise = tf.random.uniform(shape=(image_batch.shape[0], noise_dim)) generated_images = generator_model(input_noise, training=True) true_output = discriminator_model(image_batch, training=True) fake_output = discriminator_model(generated_images, training=True) gen_loss = generator_loss(fake_output) disc_loss = discriminator_loss(true_output, fake_output) gen_gradients = gen_tape.gradient( gen_loss, generator_model.trainable_variables) disc_gradients = disc_tape.gradient( disc_loss, discriminator_model.trainable_variables) generator_optimizer.apply_gradients( zip(gen_gradients, generator_model.trainable_variables)) discriminator_optimizer.apply_gradients( zip(disc_gradients, discriminator_model.trainable_variables)) logs = {"loss": gen_loss} ckpt.on_epoch_end(epoch=epoch) plotting_callback.on_train_end() general_utils.smart_print(start, len(data_generator), i, epoch, n_epochs, gen_loss, disc_loss)
class IVModel(Model): def __init__(self): super().__init__() def call(self, x): for layer in self.h_layers: x = layer(x) return x def _add_loss(self, loss): self.loss = loss def _set_lr(self, lr): if lr is not None: self.optimizer = Adam(lr=lr) def train_step(self, x_batch): total_increment = tf.squeeze(x_batch[:, -1, 0] - x_batch[:, 0, 0]) with tf.GradientTape() as tape: int_var = self(x_batch) int_var = tf.squeeze(int_var) loss_value = self.loss(total_increment, int_var) grads = tape.gradient(loss_value, self.trainable_weights) self.optimizer.apply_gradients(zip(grads, self.trainable_weights)) return loss_value def train(self, x_train, num_epochs, batch_size, lr=None, true_int_var=None, show_loss = True, show_hist=False): self._set_lr(lr) n_steps = x_train.shape[0] // batch_size x_train = create_train_dataset(x_train, batch_size) losses = [] mses = [] for epoch in range(num_epochs): with tqdm_notebook(total=n_steps, desc=f'Epoch {epoch+1} of {num_epochs}') as progress: for step, x_batch in enumerate(x_train): progress.update() loss_val = self.train_step(x_batch) losses.append(loss_val.numpy()) if show_loss: plot_loss(losses) int_var = self(x_batch).numpy().squeeze() if true_int_var: mse_val = _mse_metric(true_int_var, int_var) mses.append(mse_val.numpy()) if show_hist: plot_hist(x_batch, int_var, true_int_var) self.history = {'loss': losses, 'mse': mses} return self.history def predict_iv(self, x): iv = self(x).numpy() return iv.squeeze() def predict_z(self, x): iv = self.predict_iv(x) z = (x[:,-1] - x[:,0])/np.sqrt(iv) return z
class DQN: def __init__(self, num_states, num_actions, model, target_model, buffer, gamma, batch_size, learning_rate, min_experience): self.num_states = num_states self.num_actions = num_actions self.model = model self.target_model = target_model self.buffer = buffer self.gamma = gamma self.batch_size = batch_size self.optimizer = Adam(learning_rate=learning_rate) self.min_experience = min_experience def predict(self, state): return self.model(np.atleast_2d(state.astype('float32'))) def update_model(self, target_model): if len(self.buffer.buffer['state']) < self.min_experience: return 0 # Get mini batch index = np.random.randint(low=0, high=len(self.buffer.buffer['state']), size=self.batch_size) states = np.asarray([self.buffer.buffer['state'][i] for i in index]) actions = np.asarray([self.buffer.buffer['action'][i] for i in index]) rewards = np.asarray([self.buffer.buffer['reward'][i] for i in index]) next_states = np.asarray( [self.buffer.buffer['next_state'][i] for i in index]) dones = np.asarray([self.buffer.buffer['done'][i] for i in index]) next_action_values = np.max(target_model.predict(next_states), axis=1) # print(next_action_values) # np.where allows us to have if the first argument is true, choose the # second argument, otherwise choose the third argument # done = True means it's a terminal state, so we only have a reward, and # no discounted action values from the next state. target_values = np.where(dones, rewards, rewards + self.gamma * next_action_values) # print(target_values) # Update neural network weights with tf.GradientTape() as tape: action_values = tf.math.reduce_sum( self.predict(states) * tf.one_hot(actions, self.num_actions), axis=1) # print(action_values) # Q network is trained byb minimising loss function loss = tf.math.reduce_mean(tf.square(target_values - action_values)) # Gradient descent by differentiating loss function w.r.t. weights variables = self.model.trainable_variables gradients = tape.gradient(loss, variables) # Update weights self.optimizer.apply_gradients(zip(gradients, variables)) return loss
class Critic(tf.keras.Model): def __init__(self,model_parameters=None): super().__init__(name = "critic") if model_parameters is None: model_parameters = { 'lr': 0.0001, 'beta1': 0, 'batch_size': 64, 'latent_dim': 128, 'image_size': 152 } self.layers_blocks = list() self.model_parameters = model_parameters dim = model_parameters['batch_size'] / 2 init = RandomNormal(stddev=0.02) #Layers self.conv_1 = Conv2D(dim, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init, input_shape=[model_parameters['image_size'], model_parameters['image_size'], 3]) self.leaky_1 = LeakyReLU(alpha=0.2) number_of_layers_needed = int(math.log(model_parameters['image_size'],2))-3 for i in range(number_of_layers_needed): dim *= 2 self.layers_blocks.append([ Conv2D(dim, (5, 5), strides=(2, 2), padding='same', kernel_initializer=init), LayerNormalization(), LeakyReLU(alpha=0.2) ]) self.flat = Flatten() self.logits = Dense(1) # This neuron tells us how real or fake the input is self.optimizer = Adam(learning_rate=model_parameters['lr'],beta_1=model_parameters['beta1'],beta_2=0.9) def call(self, input_tensor, training = True): ## Definition of Forward Pass x = self.leaky_1(self.conv_1(input_tensor)) for i in range(len(self.layers_blocks)): layers_block = self.layers_blocks[i] for layer in layers_block: x = layer(x, training = training) x = self.flat(x) return self.logits(x) def compute_loss(self,y_true,y_pred): """ Wasserstein loss """ return backend.mean(y_true * y_pred) def backPropagate(self,gradients,trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables)) def save_optimizer(self): weights = self.optimizer.get_weights() data_access.store_weights_in_file('c_optimizer_weights',weights)
def lipschitz_lb(f, X1, X2, iterations=1000, verbose=True): optimizer = Adam(lr=0.0001) X1 = tf.Variable(X1, name='x1', dtype='float32') X2 = tf.Variable(X2, name='x2', dtype='float32') max_L = None if verbose: pb = Progbar(iterations, stateful_metrics=['LC']) for _ in range(iterations): with tf.GradientTape() as tape: y1 = f(X1) y2 = f(X2) # The definition of the margin is not entirely symmetric: the top # class must remain the same when measuring both points. We assume # X1 is the reference point for determining the top class. original_predictions = tf.cast( tf.equal(y1, tf.reduce_max(y1, axis=1, keepdims=True)), 'float32') # This takes the logit at the top class for both X1 and X2. y1_j = tf.reduce_sum( y1 * original_predictions, axis=1, keepdims=True) y2_j = tf.reduce_sum( y2 * original_predictions, axis=1, keepdims=True) margin1 = y1_j - y1 margin2 = y2_j - y2 axes = tuple((tf.range(len(X1.shape) - 1) + 1).numpy()) L = tf.abs(margin1 - margin2) / (tf.sqrt( tf.reduce_sum((X1 - X2)**2, axis=axes)) + EPS)[:,None] loss = -tf.reduce_max(L, axis=1) grad = tape.gradient(loss, [X1, X2]) optimizer.apply_gradients(zip(grad, [X1, X2])) if max_L is None: max_L = L else: max_L = tf.maximum(max_L, L) if verbose: pb.add(1, [('LC', tf.reduce_max(max_L))]) return tf.reduce_max(max_L)
class ReinforceBaseLine(Reinforce): def __init__(self, env, actor_lr, critic_lr, policy, critic, gamma, max_episodes, max_eps_steps): super().__init__(env, actor_lr, policy, gamma, max_episodes, max_eps_steps) self.critic = critic self.critic_optimizer = Adam(critic_lr) def _run_episode(self): state = tf.constant(self.env.reset(), dtype=tf.float32) rewards = tf.TensorArray(tf.float32, 0, True) action_probs = tf.TensorArray(tf.float32, 0, True) state_values = tf.TensorArray(tf.float32, 0, True) state_shape = state.shape for step in tf.range(self.max_eps_steps): action, action_logits_step = self.get_action(state) action_probs_step = tf.nn.softmax(action_logits_step)[0, action] state, reward, done = self.tf_env_step(action) value = self.critic(tf.expand_dims(state, 0)) self.steps_taken += 1 action_probs = action_probs.write(step, action_probs_step) rewards = rewards.write(step, reward) state_values = state_values.write(step, value) state.set_shape(state_shape) if tf.cast(done, tf.bool): break return action_probs.stack(), rewards.stack(), state_values.stack() def train(self): with tf.GradientTape() as tape, tf.GradientTape() as tape2: action_probs, rewards, values = self._run_episode() discounted_rewards = compute_discounted_rewards( rewards, self.gamma) policy_loss = _compute_policy_loss(action_probs, discounted_rewards, values) critic_loss = huber_loss(values, discounted_rewards) policy_grads = tape.gradient(policy_loss, self.policy.trainable_variables) critic_grads = tape2.gradient(critic_loss, self.critic.trainable_variables) self.policy_optimizer.apply_gradients( zip(policy_grads, self.policy.trainable_variables)) self.critic_optimizer.apply_gradients( zip(critic_grads, self.critic.trainable_variables)) return rewards
class ContinuousA2CAgent: def __init__(self, action_size, max_action): self.render = False # 행동의 크기 정의 self.action_size = action_size self.max_action = max_action # 액터-크리틱 하이퍼파라미터 self.discount_factor = 0.99 self.learning_rate = 0.001 # 정책신경망과 가치신경망 생성 self.model = ContinuousA2C(self.action_size) # 최적화 알고리즘 설정, 미분값이 너무 커지는 현상을 막기 위해 clipnorm 설정 self.optimizer = Adam(lr=self.learning_rate, clipnorm=1.0) # 정책신경망의 출력을 받아 확률적으로 행동을 선택 def get_action(self, state): mu, sigma, _ = self.model(state) dist = tfd.Normal(loc=mu[0], scale=sigma[0]) action = dist.sample([1])[0] action = np.clip(action, -self.max_action, self.max_action) return action # 각 타임스텝마다 정책신경망과 가치신경망을 업데이트 def train_model(self, state, action, reward, next_state, done): model_params = self.model.trainable_variables with tf.GradientTape() as tape: mu, sigma, value = self.model(state) _, _, next_value = self.model(next_state) target = reward + (1 - done) * self.discount_factor * next_value[0] # 정책 신경망 오류 함수 구하기 advantage = tf.stop_gradient(target - value[0]) dist = tfd.Normal(loc=mu, scale=sigma) action_prob = dist.prob([action])[0] cross_entropy = -tf.math.log(action_prob + 1e-5) actor_loss = tf.reduce_mean(cross_entropy * advantage) # 가치 신경망 오류 함수 구하기 critic_loss = 0.5 * tf.square(tf.stop_gradient(target) - value[0]) critic_loss = tf.reduce_mean(critic_loss) # 하나의 오류 함수로 만들기 loss = 0.1 * actor_loss + critic_loss # 오류함수를 줄이는 방향으로 모델 업데이트 grads = tape.gradient(loss, model_params) self.optimizer.apply_gradients(zip(grads, model_params)) return loss, sigma
class Leaner: def __init__(self, config: MuZeroConfig, storage: SharedStorage, replay_buffer: ReplayBuffer): self.config = config self.storage = storage self.replay_buffer = replay_buffer self.summary = create_summary(name="leaner") self.metrics_loss = Mean(f'leaner-loss', dtype=tf.float32) self.network = Network(self.config) self.lr_schedule = ExponentialDecay( initial_learning_rate=self.config.lr_init, decay_steps=self.config.lr_decay_steps, decay_rate=self.config.lr_decay_rate) self.optimizer = Adam(learning_rate=self.lr_schedule) def start(self): while self.network.training_steps() < self.config.training_steps: if ray.get(self.replay_buffer.size.remote()) > 0: self.train() if self.network.training_steps( ) % self.config.checkpoint_interval == 0: weigths = self.network.get_weights() self.storage.update_network.remote(weigths) if self.network.training_steps( ) % self.config.save_interval == 0: self.network.save() print("Finished") def train(self): batch = ray.get(self.replay_buffer.sample_batch.remote()) with tf.GradientTape() as tape: loss = self.network.loss_function(batch) grads = tape.gradient(loss, self.network.get_variables()) self.optimizer.apply_gradients(zip(grads, self.network.get_variables())) self.metrics_loss(loss) with self.summary.as_default(): tf.summary.scalar(f'loss', self.metrics_loss.result(), self.network.training_steps()) self.metrics_loss.reset_states() self.network.update_training_steps()
class Generator(tf.keras.Model): def __init__(self, latent_dim=256, batch_size=64, channels=[32, 64, 64, 128, 128]): super().__init__(name="Generator") cc = channels[-1] self.batch_size = batch_size self.latent_dim = latent_dim self.inp = InputLayer(input_shape=(self.latent_dim, )) self.dense_1 = Dense(8 * batch_size * 5 * 5, name='Generator_Dense_1') self.relu = ReLU() self.reshape_1 = Reshape((5, 5, 8 * batch_size)) self.reses = list() for ch in reversed(channels[:-1]): self.reses.append([ResidualBlock(cc, ch), UpSampling2D()]) cc = ch self.res_block_n = ResidualBlock(cc, cc) self.toRGB = Conv2D(3, (3, 3), activation='tanh', padding='same', use_bias=False, name='Generator_To_RGB') self.optimizer = Adam(learning_rate=0.0002, beta_1=0.5, beta_2=0.9) def call(self, input_tensor, training=True): x = self.inp(input_tensor) x = self.dense_1(x) x = self.relu(x) x = self.reshape_1(x) for i in range(len(self.reses)): x = self.reses[i][0](x, training=training) x = self.reses[i][1](x, training=training) if (i == 1): x = Cropping2D(cropping=((1, 0), (1, 0)))(x) x = self.res_block_n(x, training=training) x = self.toRGB(x) return x def backPropagate(self, gradients, trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables)) def set_seed(self): self.seed = tf.random.normal([self.batch_size, self.latent_dim]) data_access.store_seed_in_file('seed', self.seed) def load_seed(self): self.seed = data_access.load_seed_from_file('seed')
class Generator(tf.keras.Model): def __init__(self, random_noise_size = 128,batch_s = 64): super().__init__(name='generator') #layers init = RandomNormal(stddev=0.02) dim = 4 * batch_s self.dense_1 = Dense(7*7*dim, use_bias = False, input_shape = (random_noise_size,)) self.batchNorm1 = BatchNormalization() self.leaky_1 = LeakyReLU(alpha=0.2) self.reshape_1 = Reshape((7,7,dim)) self.up_2 = UpSampling2D((1,1), interpolation='nearest') self.conv2 = Conv2D(dim/2, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.batchNorm2 = BatchNormalization() self.leaky_2 = LeakyReLU(alpha=0.2) self.up_3 = UpSampling2D((2,2), interpolation='nearest') self.conv3 = Conv2D(dim/4, (5, 5), strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.batchNorm3 = BatchNormalization() self.leaky_3 = LeakyReLU(alpha=0.2) self.up_4 = UpSampling2D((2,2), interpolation='nearest') self.conv4 = Conv2D(1, (5, 5), activation='tanh', strides = (1,1), padding = "same", use_bias = False, kernel_initializer=init) self.optimizer = Adam(learning_rate=0.0001,beta_1=0,beta_2=0.9) self.seed = tf.random.normal([batch_s, random_noise_size]) def call(self, input_tensor): ## Definition of Forward Pass x = self.leaky_1(self.batchNorm1(self.reshape_1(self.dense_1(input_tensor)))) x = self.leaky_2(self.batchNorm2(self.conv2(self.up_2(x)))) x = self.leaky_3(self.batchNorm3(self.conv3(self.up_3(x)))) x = self.conv4(self.up_4(x)) return x def generate_noise(self,batch_size, random_noise_size): return tf.random.normal([batch_size, random_noise_size]) def compute_loss(self,y_true,y_pred,class_wanted,class_prediction): """ Wasserstein loss - prob of classifier get it right """ k = 10 # hiper-parameter kl = KLDivergence() return backend.mean(y_true * y_pred) + (k * kl(class_wanted,class_prediction)) def backPropagate(self,gradients,trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables))
class A2CAgent: def __init__(self, action_size): self.render = False # 행동의 크기 정의 self.action_size = action_size # 액터-크리틱 하이퍼파라미터 self.discount_factor = 0.99 self.learning_rate = 0.001 # 정책신경망과 가치신경망 생성 self.model = A2C(self.action_size) # 최적화 알고리즘 설정, 미분값이 너무 커지는 현상을 막기 위해 clipnorm 설정 self.optimizer = Adam(lr=self.learning_rate, clipnorm=5.0) # 정책신경망의 출력을 받아 확률적으로 행동을 선택 def get_action(self, state): policy, _ = self.model(state) policy = np.array(policy[0]) return np.random.choice(self.action_size, 1, p=policy)[0] # 각 타임스텝마다 정책신경망과 가치신경망을 업데이트 def train_model(self, state, action, reward, next_state, done): model_params = self.model.trainable_variables with tf.GradientTape() as tape: policy, value = self.model(state) _, next_value = self.model(next_state) target = reward + (1 - done) * self.discount_factor * next_value[0] # 정책 신경망 오류 함수 구하기 one_hot_action = tf.one_hot([action], self.action_size) action_prob = tf.reduce_sum(one_hot_action * policy, axis=1) cross_entropy = -tf.math.log(action_prob + 1e-5) advantage = tf.stop_gradient(target - value[0]) actor_loss = tf.reduce_mean(cross_entropy * advantage) # 가치 신경망 오류 함수 구하기 critic_loss = 0.5 * tf.square(tf.stop_gradient(target) - value[0]) critic_loss = tf.reduce_mean(critic_loss) # 하나의 오류 함수로 만들기 loss = 0.2 * actor_loss + critic_loss # 오류함수를 줄이는 방향으로 모델 업데이트 grads = tape.gradient(loss, model_params) self.optimizer.apply_gradients(zip(grads, model_params)) return np.array(loss)
class DeepSARSAgent: def __init__(self, state_size, action_size): # 상태의 크기와 행동의 크기 정의 self.state_size = state_size self.action_size = action_size # 딥살사 하이퍼 파라메터 self.discount_factor = 0.99 self.learning_rate = 0.001 self.epsilon = 1. self.epsilon_decay = .9999 self.epsilon_min = 0.01 self.model = DeepSARSA(self.action_size) self.optimizer = Adam(lr=self.learning_rate) # 입실론 탐욕 정책으로 행동 선택 def get_action(self, state): if np.random.rand() <= self.epsilon: return random.randrange(self.action_size) else: q_values = self.model(state) return np.argmax(q_values[0]) # <s, a, r, s', a'>의 샘플로부터 모델 업데이트 def train_model(self, state, action, reward, next_state, next_action, done): if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay # 학습 파라메터 model_params = self.model.trainable_variables with tf.GradientTape() as tape: tape.watch(model_params) predict = self.model(state)[0] one_hot_action = tf.one_hot([action], self.action_size) predict = tf.reduce_sum(one_hot_action * predict, axis=1) # done = True 일 경우 에피소드가 끝나서 다음 상태가 없음 next_q = self.model(next_state)[0][next_action] target = reward + (1 - done) * self.discount_factor * next_q # MSE 오류 함수 계산 loss = tf.reduce_mean(tf.square(target - predict)) # 오류함수를 줄이는 방향으로 모델 업데이트 grads = tape.gradient(loss, model_params) self.optimizer.apply_gradients(zip(grads, model_params))
class DeepSARSA_Agent: def __init__(self, step_size, action_size): # Define action_size, step_size self.step_size = step_size self.action_size = action_size # DeepSARSA hyper-parameters self.discount_factor = 0.99 self.learning_rate = 0.001 self.epsilon = 1. self.epsilon_decay = 0.9999 self.epsilon_min = 0.1 self.model = DeepSARSA(self.action_size) self.optimizer = Adam(self.learning_rate) # choose action based on epsilon-greedy policy def get_action(self, state): if np.random.randn() <= self.epsilon: return random.randrange(self.action_size) else: q_value = self.model(state) # q_value.shape = (action_size,1) return np.argmax(q_value[0]) # update model from <s,a,r,s',a'> def train_model(self, state, action, reward, next_state, next_action, DONE): if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay # train parameters model_params = self.model.trainable_variables with tf.GradientTape() as tape: tape.watch(model_params) predict = self.model(state)[0] one_hot_action = tf.one_hot([action], self.action_size) predict = tf.reduce_sum(one_hot_action * predict, axis=1) # if DONE = True -> there are no next_state,next_action next_q = self.model(next_state)[0][next_action] target = reward + (1 - DONE) * self.discount_factor * next_q # Calculate MSE loss function loss = tf.reduce_mean(tf.square(target - predict)) # model update gradients = tape.gradient(loss, model_params) self.optimizer.apply_gradients(zip(gradients, model_params))
class Generator(tf.keras.Model): def __init__(self, random_noise_size = 100): super().__init__(name='generator') #layers self.dense_1 = Dense(7*7*256, use_bias = False, input_shape = (random_noise_size,)) self.batchNorm1 = BatchNormalization() self.leaky_1 = LeakyReLU() self.reshape_1 = Reshape((7,7,256)) #self.conv2 = Conv2DTranspose(128, (5, 5), strides = (1,1), padding = "same", use_bias = False) self.up_2 = UpSampling2D((1,1), interpolation='nearest') self.conv2 = Conv2D(128, (3, 3), strides = (1,1), padding = "same", use_bias = False) self.batchNorm2 = BatchNormalization() self.leaky_2 = LeakyReLU() #self.conv3 = Conv2DTranspose(64, (5, 5), strides = (2,2), padding = "same", use_bias = False) self.up_3 = UpSampling2D((2,2), interpolation='nearest') self.conv3 = Conv2D(64, (3, 3), strides = (1,1), padding = "same", use_bias = False) self.batchNorm3 = BatchNormalization() self.leaky_3 = LeakyReLU() #self.conv4 = Conv2DTranspose(1, (5, 5), strides = (2,2), padding = "same", use_bias = False, activation = "tanh") self.up_4 = UpSampling2D((2,2), interpolation='nearest') self.conv4 = Conv2D(1, (3, 3), strides = (1,1), padding = "same", use_bias = False) self.optimizer = Adam(1e-4) def call(self, input_tensor): ## Definition of Forward Pass x = self.reshape_1(self.leaky_1(self.batchNorm1(self.dense_1(input_tensor)))) x = self.leaky_2(self.batchNorm2(self.conv2(self.up_2(x)))) x = self.leaky_3(self.batchNorm3(self.conv3(self.up_3(x)))) return self.conv4(self.up_4(x)) def generate_noise(self,batch_size, random_noise_size): return tf.random.normal([batch_size, random_noise_size]) def objective(self,dx_of_gx): # Labels are true here because generator thinks he produces real images. cross_entropy = tf.keras.losses.BinaryCrossentropy(from_logits = True) return cross_entropy(tf.ones_like(dx_of_gx), dx_of_gx) def backPropagate(self,gradients,trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables))
def train(text_vectors, images, model, epochs, batch_size=128, lr=1e-4): loss_fn = tf.keras.losses.binary_crossentropy g_optimizer = Adam(lr) d_optimizer = Adam(lr) # Rescale -1 to 1 images = images / 127.5 - 1. # images = np.expand_dims(images, axis=3) # Adversarial ground truths # valid = np.ones((batch_size, 1)) # fake = np.zeros((batch_size, 1)) for epoch in range(epochs): # idx = np.random.permutation(len(text_vectors)) # img_batch = images[idx[:batch_size]] dataset = tf.data.Dataset.from_tensor_slices((images, text_vectors)) dataset = dataset.shuffle(buffer_size=100) dataset = dataset.batch(batch_size) # img_dataset = tf.data.Dataset.from_tensor_slices(images) # img_dataset = img_dataset.batch(batch_size) # text_dataset = tf.data.Dataset.from_tensor_slices(text_vectors) # text_dataset = text_dataset.batch(batch_size) for data in dataset: fake_captions = derangement(data[1]) with tf.GradientTape() as g_tape, tf.GradientTape() as d_tape: fake_image_pred, real_image_pred, fake_caption_pred = model( data, fake_captions) fake_image_loss, real_image_loss, fake_caption_loss = \ loss_fn(tf.zeros_like(fake_image_pred), fake_image_pred), \ loss_fn(tf.ones_like(real_image_pred), real_image_pred), \ loss_fn(tf.zeros_like(fake_caption_pred), fake_caption_pred) d_loss = (fake_image_loss + real_image_loss + fake_caption_loss) / 3 g_loss = loss_fn(tf.ones_like(fake_image_pred), fake_image_pred) g_trainable_variables = model.text_encoder.trainable_variables + model.generator.trainable_variables g_grads = d_tape.gradient(d_loss, g_trainable_variables) d_grads = g_tape.gradient(g_loss, model.discriminator.trainable_variables) g_optimizer.apply_gradients(zip(g_grads, g_trainable_variables)) d_optimizer.apply_gradients( zip(d_grads, model.discriminator.trainable_variables)) # Plot the progress print("%d [D loss: %f] [G loss: %f]" % (epoch, d_loss[0], g_loss[0]))
def train(model, content_ds, style_ds, loss, n_epochs=10, save_path=None): save_interval = 100 optimizer = Adam(lr=1e-4, decay=5e-5) n_batches = len(content_ds) // content_ds.batch_size process = psutil.Process(os.getpid()) alpha = 1.0 for e in range(1, n_epochs + 1): losses = {"total": 0.0, "content": 0.0, "style": 0.0, "color": 0.0} pbar = tqdm(total=n_batches, ncols=50) for i in range(n_batches): # Get batch content, style = content_ds.get_batch(), style_ds.get_batch() if content is None or style is None: break # Train on batch # total_loss, content_loss, weighted_style_loss, weighted_color_loss with tf.GradientTape() as tape: prediction = model([content, style, alpha]) loss_values = loss([content, style], prediction) grads = tape.gradient(loss_values[0], model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) for key, lss in zip(losses.keys(), loss_values): losses[key] = (losses[key] * i + lss) / (i + 1) string = "".join([ f"{key} loss: {value:.3f}\t" for key, value in losses.items() ]) pbar.set_description(f"Epoch {e}/{n_epochs}\t" + string + f"memory: {process.memory_info().rss}\t") pbar.update(1) if i % save_interval == 0: if save_path: model.save(save_path) time = datetime.datetime.now() print(time.date(), time.hour, time.minute) model.save( f'saved\models\epoch{e}_{time.date()}_{time.hour}_{time.minute}.h5' )
class Critic(tf.keras.Model): def __init__(self): super().__init__(name="critic") init = RandomNormal(stddev=0.2) #Layers self.conv_1 = SpectralNormalization( Conv2D(64, (3, 3), strides=(2, 2), padding='same', kernel_initializer=init, input_shape=[28, 28, 1])) self.leaky_1 = LeakyReLU(alpha=0.2) self.dropout_1 = Dropout(0.3) self.conv_2 = SpectralNormalization( Conv2D(128, (3, 3), strides=(2, 2), padding='same', kernel_initializer=init)) self.leaky_2 = LeakyReLU(alpha=0.2) self.dropout_2 = Dropout(0.3) self.flat = Flatten() self.logits = Dense( 1) # This neuron tells us if the input is fake or real self.optimizer = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9) def call(self, input_tensor): ## Definition of Forward Pass x = self.dropout_1(self.leaky_1(self.conv_1(input_tensor))) x = self.dropout_2(self.leaky_2(self.conv_2(x))) x = self.flat(x) return self.logits(x) def compute_loss(self, y_true, y_pred): """ Wasserstein loss """ return backend.mean(y_true * y_pred) def backPropagate(self, gradients, trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables))
class Collection_Generator(tf.keras.Model): def __init__(self, generators): super().__init__(name="Coll_Generator") self.gens = generators self.optimizer = Adam(learning_rate=0.0001, beta_1=0, beta_2=0.9) self.n = 0 def update_current_n_layer(self): self.n += 1 def start_fading(self, n): self.gens[n].activate_fade_in() self.update_current_n_layer() def stop_fading(self, n): self.gens[n].disactivate_fade_in() def call(self, input_tensor): x = input_tensor for i in range(self.n + 1): x = self.gens[i](x) return x def set_seed(self): self.seed = tf.random.normal([16, 100]) data_access.store_seed_in_file('seed', self.seed) def load_seed(self): self.seed = data_access.load_seed_from_file('seed') def generate_noise(self, batch_size, random_noise_size): return tf.random.normal([batch_size, random_noise_size]) def backPropagate(self, gradients, trainable_variables): self.optimizer.apply_gradients(zip(gradients, trainable_variables)) def compute_loss(self, y_true, y_pred, class_wanted, class_prediction): """ Wasserstein loss - prob of classifier get it right """ k = 10 # hiper-parameter return backend.mean( y_true * y_pred ) # + (k * categorical_crossentropy(class_wanted,class_prediction))
class IntrinsicCuriosityModule: def __init__(self, state_shape, action_num, latent_shape, alpha=1e-4, beta=0.2): self.icm = get_intrinsic_curiosity_module(state_shape, action_num, latent_shape) self.beta = beta self.optimizer = Adam(learning_rate=alpha) def learn(self, states, actions, next_states): with tf.GradientTape() as tape: forward_losses, pred_actions = self(states, actions, next_states) forward_loss = K.mean(forward_losses) inv_loss = MeanSquaredError(actions, pred_actions) loss = (self.beta * forward_loss + (1 - self.beta) * inv_loss) grads = tape.gradient(loss, self.icm.trainable_weights) self.optimizer.apply_gradients(zip(grads, self.icm.trainable_weights)) self.states.clear() self.actions.clear() self.next_states.clear() return loss def __call__(self, state, action, next_state): breakpoint() # Expected shapes # state: [None, state_shape] # action: [None, action] # next_state: [None, state_shape] forward_loss, _ = self.icm(state, action, next_state) return forward_loss def save_module(state_features_filepath, forward_model_filepath, inverse_model_filepath): pass @staticmethod def load_module(state_features_filepath, forward_model_filepath, inverse_model_filepath): pass
def __init__(self, num_nets, state_dim, action_dim, learning_rate): """ :param num_nets: number of networks in the ensemble :param state_dim: state dimension :param action_dim: action dimension :param learning_rate: """ self.sess = tf.Session() self.num_nets = num_nets self.state_dim = state_dim self.action_dim = action_dim self.learning_rate = learning_rate K.set_session(self.sess) # Log variance bounds self.max_logvar = tf.Variable(-3 * np.ones([1, self.state_dim]), dtype=tf.float32) self.min_logvar = tf.Variable(-7 * np.ones([1, self.state_dim]), dtype=tf.float32) # Define ops for model output and optimization self.inputs = list() self.losses = list() self.means = list() self.logvars = list() self.models = list() self.outputs = list() self.targets = list() self.optimizations = list() for model in range(self.num_nets): model, inp = self.create_network() self.inputs.append(inp) self.models.append(model) output = self.get_output(model.output) mean, logvar = output self.means.append(mean) self.logvars.append(logvar) self.outputs.append(output) target = tf.placeholder(tf.float32, shape=(None, self.state_dim)) self.targets.append(target) var = tf.exp(logvar) inv_var = tf.divide(1, var) norm_output = mean - target # Calculate loss: Mahalanobis distance + log(det(cov)) loss = tf.multiply(tf.multiply(norm_output, inv_var), norm_output) loss = tf.reduce_sum(loss, axis=1) loss += tf.math.log(tf.math.reduce_prod(var, axis=1)) self.losses.append(loss) optimizer = Adam(lr=learning_rate) weights = model.trainable_weights gradients = tf.gradients(loss, weights) optimize = optimizer.apply_gradients(zip(gradients, weights)) self.optimizations.append(optimize) self.sess.run(tf.initialize_all_variables())