def custom_loss(y_true, y_pred): # notice that MeanSquaredError is different from mse, due to their inconsistent usage of K.mean # class MeanSquaredError, BinaryCrossentropy: their __call__ method will return a scalar # which coincides with the return type of custom_loss function, i.e. scalar # to be more precise: (tensor) shape=() image_loss = MeanSquaredError()(y_true=input_fifth, y_pred=predicted_img) # scalar reward_loss = MeanSquaredError()(y_true=reward, y_pred=predicted_reward) # scalar done_loss = BinaryCrossentropy()(y_true=done, y_pred=predicted_done) # scalar # notice that latent_loss is related to latent_dim # here we divided it by latent_dim latent_loss = 1 - K.square(z_mean) - K.exp( z_log_var) + z_log_var # (?, latent_dim) latent_loss = K.sum(latent_loss, axis=-1) # sum along the last axis --> (?,) latent_loss *= -0.5 # make latent_loss irrelevant to latent_dim, where the latter one may vary as a hyper-parameter latent_loss /= latent_dim latent_loss = K.mean(latent_loss) # take mean over batch --> scalar overall_loss = \ loss_weight['image_loss'] * image_loss + \ loss_weight['reward_loss'] * reward_loss + \ loss_weight['done_loss'] * done_loss + \ loss_weight['latent_loss'] * latent_loss return overall_loss
def train_step_branch(inputs): with tf.GradientTape(persistent=False) as tape: predicted_img, predicted_reward, predicted_done = vae(inputs, training=True) _reconstruction_loss = MeanSquaredError()(predicted_img, vae.input_fifth_var) _reward_loss = MeanSquaredError()(predicted_reward, vae.reward_var) _kl_loss = -0.5 * (K.sum(1 - K.square(vae.z_mean_var) - K.exp(vae.z_log_var_var) + vae.z_log_var_var, axis=-1)) # _done_loss = BinaryCrossentropy()(predicted_done, vae.done_var) # bough_loss = loss_weight['image_loss'] * _reconstruction_loss + \ # loss_weight['latent_loss'] * _kl_loss branch_loss = _reward_loss # total_loss = loss_weight['image_loss'] * _reconstruction_loss + \ # loss_weight['latent_loss'] * _kl_loss + \ # loss_weight['reward_loss'] * _reward_loss \ # # + loss_weight['done_loss'] * _done_loss reward_trainable_vairables = vae.dr1.trainable_variables + vae.dr2.trainable_variables + \ vae.dr3.trainable_variables + vae.dr4.trainable_variables + \ vae.dreward.trainable_variables gradients_reward = tape.gradient(branch_loss, reward_trainable_vairables) optimizer.apply_gradients((zip(gradients_reward, reward_trainable_vairables))) train_loss(branch_loss) # train_loss(total_loss) train_reward_loss(_reward_loss) train_reconstruction_loss(_reconstruction_loss) train_kl_loss(_kl_loss)
def test_step(inputs, train_branch=False): predicted_img, predicted_reward, predicted_done = vae(inputs, training=False) _reconstruction_loss = MeanSquaredError()(predicted_img, vae.input_fifth_var) _reward_loss = MeanSquaredError()(predicted_reward, vae.reward_var) _kl_loss = -0.5 * (K.sum(1 - K.square(vae.z_mean_var) - K.exp(vae.z_log_var_var) + vae.z_log_var_var, axis=-1)) # _done_loss = BinaryCrossentropy()(predicted_done, vae.done_var) # total_loss = loss_weight['image_loss'] * _reconstruction_loss + \ # loss_weight['latent_loss'] * _kl_loss + \ # loss_weight['reward_loss'] * _reward_loss # # + \ # # loss_weight['done_loss'] * _done_loss bough_loss = loss_weight['image_loss'] * _reconstruction_loss + \ loss_weight['latent_loss'] * _kl_loss branch_loss = _reward_loss if train_branch: test_loss(branch_loss) else: test_loss(bough_loss) # test_loss(total_loss) test_reconstruction_loss(_reconstruction_loss) test_reward_loss(_reward_loss) test_kl_loss(_kl_loss)
def __init__(self, input_shape=None, action_num=None, alpha=1e-4, beta=5e-4, gamma=0.99, eta=50, icm_alpha=1e-4, icm_beta=0.2, entropy_coef=0.1, entropy_decay=0.99, actor_loss_epsilon=0.2, actor_file=None, critic_file=None, icm_file=None, training=True): if actor_file == None: if input_shape == None or action_num == None: raise Exception( 'input_shape and action_num are required when no actor file is specified.' ) self.actor = get_actor(input_shape, action_num, [256, 256]) else: self.actor = load_model(actor_file) if training: self.input_shape = input_shape self.action_num = action_num self.experiences = [] if icm_file == None: self.icm = get_intrinsic_curiosity_module( input_shape, action_num, 64) else: self.icm = load_model(icm_file) self.gamma = gamma self.eta = eta self.icm_beta = icm_beta self.entropy_coef = entropy_coef self.entropy_decay = entropy_decay self.actor_loss_epsilon = actor_loss_epsilon self.actor_optimizer = Adam(learning_rate=alpha) self.critic_optimizer = Adam(learning_rate=beta) self.icm_optimizer = Adam(learning_rate=icm_alpha) self.critic_loss_func = MeanSquaredError() self.icm_loss_func = MeanSquaredError() if critic_file == None: if input_shape == None: raise Exception( 'input_shape is required when no critic file is specified.' ) self.critic = get_critic(input_shape, [256, 256]) else: self.critic = load_model(critic_file) self.prev_actor = clone_model(self.actor) self.prev_actor.set_weights(self.actor.get_weights())
def __init__(self, content_path, style_path, batch_size, model_path, debug, validate_content, validate_style, style_weight, content_weight, reflect_padding, num_epochs, learning_rate, lr_decay): self.style_weight = style_weight self.content_weight = content_weight self.num_epochs = num_epochs self.learning_schedule = InverseTimeDecay( initial_learning_rate=learning_rate, decay_steps=1, decay_rate=lr_decay) self.optimizer = Adam(learning_rate=self.learning_schedule, beta_1=0.9) self.mse = MeanSquaredError() self.model_path = model_path self.debug = debug self.build_model_ckpt(reflect_padding) self.content_dataset = self.create_dataset(content_path, batch_size) self.style_dataset = self.create_dataset(style_path, batch_size) if debug: self.create_summary_writer() self.create_metrics() self.validate_content = load_image(validate_content, training=False) self.validate_style = load_image(validate_style, training=False) self.validate_content = vgg_preprocess(self.validate_content) self.validate_style = vgg_preprocess(self.validate_style) create_dir('./results')
def _create_model(self, filters, kernel_size): from tensorflow.keras.losses import ( MeanSquaredError, CategoricalCrossentropy ) from tensorflow.keras.models import Model from tensorflow.keras.layers import Input from tensorflow.keras.optimizers import SGD input = Input(shape=self.input_dim) nn = self._create_convolutional_layer(input, filters, kernel_size) for i in range(self.residual_layers): nn = self._create_residual_layer(nn, filters, kernel_size) value_head = self._create_value_head(nn, filters) policy_head = self._create_policy_head(nn, filters) model = Model(inputs=[input], outputs=[value_head, policy_head]) model.compile(optimizer=SGD(learning_rate=self.learning_rate), loss={ 'value_head': MeanSquaredError(), 'policy_head': CategoricalCrossentropy() }, loss_weights={'value_head': 0.5, 'policy_head': 0.5} ) return model
def __init__(self): self.classif = Sequential() self.classif.add(Dense(20, activation='relu', kernel_initializer='random_normal', input_dim=1)) # self.classif.add(Activation(custom_activation, name='SpecialActivation')) self.classif.add(Dense(20, activation='relu', kernel_initializer='random_normal', input_dim=20)) self.classif.add(Dense(20, activation='sigmoid', kernel_initializer='random_normal', input_dim=20)) self.classif.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal', input_dim=20)) self.classif.compile(optimizer='Nadam', loss=MeanSquaredError(), metrics=["mean_squared_error"])
def __init__(self, input_shape=None, action_num=None, alpha=0.0001, beta=0.0005, gamma=0.99, entropy_coef=1e-3, entropy_decay=0.99, actor_file=None, critic_file=None, training=True): if actor_file == None: if input_shape == None or action_num == None: raise Exception('input_shape and action_num are required when no actor file is specified') self.actor = self._get_actor(input_shape, action_num, [64]) else: self.actor = load_model(actor_file) if training: self.states = [] self.actions = [] self.rewards = [] self.next_states = [] self.dones = [] self.gamma = gamma self.entropy_coef = entropy_coef self.entropy_decay = entropy_decay self.actor_optimizer = Adam(learning_rate=alpha) self.critic_optimizer = Adam(learning_rate=beta) self.critic_loss_func = MeanSquaredError() if critic_file == None: if input_shape == None: raise Exception('input_shape is required when no critic file is specified') self.critic = self._get_critic(input_shape, [64]) else: self.critic = load_model(critic_file)
def __init__(self, input_shape=None, action_num=None, alpha=0.0001, beta=0.0005, gamma=0.99, eta=10, entropy_coef=0.1, entropy_decay=0.99, actor_loss_epsilon=0.2, actor_file=None, critic_file=None, training=True): if actor_file == None: if input_shape == None or action_num == None: raise Exception('input_shape and action_num are required when no actor file is specified.') self.actor = self._get_actor(input_shape, action_num, [64]) else: self.actor = load_model(actor_file) if training: self.grad_tape = tf.GradientTape(persistent=True) self.experiences = [] self.gamma = gamma self.eta = eta self.entropy_coef = entropy_coef self.entropy_decay = entropy_decay self.actor_loss_epsilon = actor_loss_epsilon self.actor_optimizer = Adam(learning_rate=alpha) self.critic_optimizer = Adam(learning_rate=beta) self.critic_loss_func = MeanSquaredError() #self.icm = IntrinsicCuriosityModule( # input_shape, # action_num, # 64 #) if critic_file == None: if input_shape == None: raise Exception('input_shape is required when no critic file is specified.') self.critic = self._get_critic(input_shape, [64]) else: self.critic = load_model(critic_file) self.prev_actor = clone_model(self.actor) self.prev_actor.set_weights(self.actor.get_weights())
def load_model(self, model, env_shape, action_shape, **kwargs): input_layer = Input(shape=env_shape) m = model(input_layer) output = Dense(action_shape, activation='linear')(m) self.model = Model(input_layer, output, name='dqn_model') # create loss delta = kwargs.get('loss_delta', 1.0) # delta loss is the value at which # the huber loss function transitions from # a quadratic function to a linear funtion loss_function = kwargs.get('loss_function', 'huber') if loss_function == 'huber': loss = Huber(delta=delta) elif loss_function == 'mse': loss = MeanSquaredError() else: loss = Huber(delta=delta) # create optimizer LR = kwargs.get('LR', 0.001) # learning rate LR_decay1 = kwargs.get('LR_decay1', 0.9) # Learning rate decay LR_decay2 = kwargs.get('LR_decay2', 0.999) # The exponential decay rate for the 2nd moment estimates optimizer = Adam(learning_rate=LR, beta_1=LR_decay1, beta_2=LR_decay2) self.model.compile(optimizer=optimizer, loss=loss, metrics=kwargs.get('metrics')) print(self.model.summary())
def channelwise_loss(y_true, y_pred): total_loss = 0. weights = [.5, .2, .3] #weights = [1., 1., 1.] for ch in [0, 1, 2]: total_loss += weights[ch] * MeanSquaredError()(y_true[..., ch], y_pred[..., ch]) ''' total_loss += weights[ch] * BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)( y_true[..., ch], y_pred[..., ch] ) ''' # The first channel is the nuclei # Most of the pixels below the intensity 600 are the part of the background and correlates with the cyto. # Therefore we concentrate on the >600 ground truth pixels. (600 ~ .1 after normalization) #nuclei_weight = .8 #nuclei_thresh = .1 #nuclei_weight_tensor = nuclei_weight*tf.cast(y_true[..., 0] > nuclei_thresh, tf.float32) + (1.-tf.cast(y_true[..., 0] <= nuclei_thresh, tf.float32)) ''' nuclei_loss = BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)( y_true[..., 0], y_pred[..., 0] ) ''' #total_loss += tf.math.reduce_mean(nuclei_loss * nuclei_weight_tensor) #total_loss += tf.math.reduce_mean(nuclei_loss) return total_loss
def create_CartPol_model_from_config(env_shape, action_shape, **kwargs): # Define layers input_layer = Input(shape=env_shape) d1 = Dense(24, activation='relu')(input_layer) d2 = Dense(24, activation='relu')(d1) output_layer = Dense(action_shape, activation='linear')(d2) model = Model(input_layer, output_layer, name='CartPol_model') # create loss delta = kwargs.get('loss_delta', 1.0) # delta loss is the value at which # the huber loss function transitions from # a quadratic function to a linear funtion # loss = Huber(delta=delta) loss = MeanSquaredError() # create optimizer LR = kwargs.get('LR', 0.001) # learning rate LR_decay1 = kwargs.get('LR_decay1', 0.9) # Learning rate decay LR_decay2 = kwargs.get('LR_decay2', 0.999) # The exponential decay rate for the 2nd moment estimates optimizer = Adam(learning_rate=LR, beta_1=LR_decay1, beta_2=LR_decay2) model.compile(optimizer=optimizer, loss=loss, metrics=None) return model
def __init__(self, name='generator'): super(Generator, self).__init__(name) # TODO: understand better why to use VGG convolution feature? self.vgg_feature = VGGFeature().output_layer() # generator structure self.conv_1 = tf.keras.layers.Conv2D(64, kernel_size=9, strides=1, padding='same', name='conv_1', kernel_initializer=tf.random_normal_initializer(stddev=0.02)) self.prelu_1 = tf.keras.layers.PReLU(alpha_initializer='zeros', shared_axes=[1, 2], name='prelu_1') self.res_blocks = list(map(lambda x: ResidualBlock(name=f'residual_{x}'), range(16))) self.conv_2 = tf.keras.layers.Conv2D(filters=64, kernel_size=3, strides=1, padding='same', name='conv_2', kernel_initializer=tf.random_normal_initializer(stddev=0.02)) self.bn_1 = tf.keras.layers.BatchNormalization(momentum=0.8, name='bn_1',) self.upsample_blocks = list(map(lambda x: UpsampleBlock(num_filters=256, name=f'upsample_{x}'), range(2))) self.conv_3 = tf.keras.layers.Conv2D(filters=3, kernel_size=9, strides=1, padding="same", activation='tanh', name='conv_3', kernel_initializer=tf.random_normal_initializer(stddev=0.02)) # loss self.binary_crossentropy = tf.keras.losses.BinaryCrossentropy(from_logits=False) self.mse = MeanSquaredError()
def _build_model(self, num_layers, width): '''Create model with num_layers hidden layes with width units Inputs - num_layers: integer, number of hidden layers - width: integer, number of units per layer Outputs - model: Keras model object ''' #Input layer inputs = Input((self.input_dim,)) #num_layers fully connected layers X = Dense(width, activation="relu")(inputs) for i in range(num_layers-1): X = Dense(width, activation='relu')(X) #Output layer with output_dim units outputs = Dense(self.output_dim, activation='linear')(X) #Adam optimizer opt = Adam(lr=self.learning_rate) #model, with mean squared loss model = keras.Model(inputs=inputs, outputs=outputs) model.compile(loss = MeanSquaredError(), optimizer=opt) return model
def test_weights_clip(self): """WeightsClip should restrict values after optimizer updates""" constraint = WeightsClip(min_value=-0.001, max_value=0.001) initializer = Constant(1.0) layer = Dense(10, kernel_constraint=constraint, bias_constraint=constraint, kernel_initializer=initializer, bias_initializer=initializer) with tf.GradientTape() as tape: data = tf.random.normal(shape=(1, 5)) pred = layer(data) true_lb = tf.ones_like(pred) loss = MeanSquaredError()(true_lb, pred) # before update, weights should equal to initial values for weight in layer.weights: assert np.allclose(weight.numpy(), 1.0) optimizer = SGD(learning_rate=0.0001) grad = tape.gradient(loss, layer.trainable_variables) optimizer.apply_gradients(zip(grad, layer.trainable_variables)) # after update, weights should within constraint ranges for weight in layer.weights: assert np.all(-0.001 <= weight.numpy()) assert np.all(weight.numpy() <= 0.001)
def __init__(self, conf): """Initialize the Pix2Pix model. Args: conf (dict): loaded configuration file. """ super().__init__(conf) conf_generator = conf["nn_structure"]["generator"] self.G = create_generator(conf_generator, self.input_shape, 0) conf_discriminator = conf["nn_structure"]["discriminator"] self.D = create_discriminator(conf_discriminator, self.input_shape) self.disc_loss_function = (BinaryCrossentropy( from_logits=True) if conf_discriminator["loss_function"] == "BCE" else MeanSquaredError()) # loss_coeffs self.DM_loss_coeff = conf["loss_coeffs"]["DM_loss_coeff"] self.L1_loss_coeff = conf["loss_coeffs"]["L1_loss_coeff"] self.gan_loss_coeff = conf["loss_coeffs"]["gan_loss_coeff"] # Optimizers if self.ttur: self.D_optimizer = Adam(learning_rate=self.d_lr, beta_1=0.5) self.G_optimizer = Adam(learning_rate=self.g_lr, beta_1=0.5) else: self.D_optimizer = Adam(learning_rate=self.lr, beta_1=0.5) self.G_optimizer = Adam(learning_rate=self.lr, beta_1=0.5) self.disc_optimizers = [self.D_optimizer] self.generator_optimizers = [self.G_optimizer]
def train_step(self, images, low_res): with GradientTape() as gen_tape, GradientTape() as disc_tape: generated_images = self.generator(low_res, training=True) mse = MeanSquaredError() loss = mse(images, generated_images) if loss < self.min_loss and self.count > 200: gan_name = 'generator.h5' self.generator.save(gan_name) self.discriminator.save('discriminator.h5') self.min_loss = loss real_output = self.discriminator(images, training=True) fake_output = self.discriminator(generated_images, training=True) gen_loss = self.generator_loss(fake_output) + loss disc_loss = self.discriminator_loss(real_output, fake_output) + loss if self.count % 100 == 0: print(self.count, self.min_loss, gen_loss, disc_loss) gradients_of_generator = gen_tape.gradient(gen_loss, self.generator.trainable_variables) gradients_of_discriminator = disc_tape.gradient(disc_loss, self.discriminator.trainable_variables) self.generator_optimizer.apply_gradients(zip(gradients_of_generator, self.generator.trainable_variables)) self.discriminator_optimizer.apply_gradients( zip(gradients_of_discriminator, self.discriminator.trainable_variables))
def train_step(self, agent, target_agent): if len(self.memory) < self.batch_size: return 0. minibatch = random.sample(self.memory, self.batch_size) state = tf.stack([minibatch[i][0][0] for i in range(self.batch_size)]) action = [minibatch[i][1] for i in range(self.batch_size)] reward = [minibatch[i][2] for i in range(self.batch_size)] next_state = tf.stack( [minibatch[i][3][0] for i in range(self.batch_size)]) done = [minibatch[i][4] for i in range(self.batch_size)] q_values_t = agent.predict(state) q_values_t1 = tf.math.argmax(agent.predict(next_state), axis=-1) for i in range(self.batch_size): if done[i]: q_values_t[i][action[i]] = reward[i] else: q_values_t[i][action[i]] = reward[i] +\ self.gamma * target_agent.predict(next_state)[i][q_values_t1[i]] with tf.GradientTape() as tape: predictions = agent(state, training=True) loss = MeanSquaredError(reduction=tf.keras.losses.Reduction.SUM)( q_values_t, predictions) grads = tape.gradient(loss, agent.trainable_variables) self.optimizer.apply_gradients( zip(grads, agent.trainable_variables)) return loss
def __init__(self, util: Utils, hr_size=96, log_dir: str = None, num_resblock: int = 16): self.vgg = self.vgg(20) self.learning_rate = 0.00005 self.clipping = 0.01 self.generator_optimizer = RMSprop(learning_rate=self.learning_rate, clipvalue=self.clipping) self.discriminator_optimizer = RMSprop( learning_rate=self.learning_rate, clipvalue=self.clipping) self.binary_cross_entropy = BinaryCrossentropy(from_logits=True) self.mean_squared_error = MeanSquaredError() self.util: Utils = util self.HR_SIZE = hr_size self.LR_SIZE = self.HR_SIZE // 4 if log_dir is not None: self.summary_writer = tf.summary.create_file_writer(log_dir) if log_dir.startswith('../'): log_dir = log_dir[len('../'):] print('open tensorboard with: tensorboard --logdir ' + log_dir) else: self.summary_writer = None self.generator = make_generator_model(num_res_blocks=num_resblock) self.discriminator = make_discriminator_model(self.HR_SIZE) self.checkpoint = tf.train.Checkpoint(generator=self.generator, discriminator=self.discriminator)
def compile_model(self): optimizer = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False) loss = MeanSquaredError() self.model.compile(optimizer=optimizer, loss=loss)
def __init__(self, generator, discriminator, content_loss='VGG54', learning_rate=PiecewiseConstantDecay(boundaries=[100000], values=[1e-4, 1e-5])): if content_loss == 'VGG22': self.vgg = srgan.vgg_22() elif content_loss == 'VGG54': self.vgg = srgan.vgg_54() else: raise ValueError("content_loss must be either 'VGG22' or 'VGG54'") self.content_loss = content_loss self.generator = generator self.discriminator = discriminator self.generator_optimizer = Adam(learning_rate=learning_rate) self.discriminator_optimizer = Adam(learning_rate=learning_rate) self.binary_cross_entropy = BinaryCrossentropy(from_logits=False) self.mean_squared_error = MeanSquaredError() log_param("Model", "Pre-trained SRGAN") log_param("Learning rate", learning_rate) log_param("Content loss", content_loss)
def __init__(self, encoder, decoder, img_size=64, z_dim=100, batch_size=16, n_epochs=51, shuffle=True): super(AE, self).__init__() # Set random seed random_seed = 42 tf.random.set_seed(random_seed) np.random.seed(random_seed) self.encoder = encoder self.decoder = decoder self.img_size = img_size self.z_dim = z_dim self.batch_size = batch_size self.n_epochs = n_epochs self.volume_size = 0 self.shuffle = shuffle self.cross_entropy = BinaryCrossentropy(from_logits=True) self.mse = MeanSquaredError() self.accuracy = BinaryAccuracy() self.hist = { "ae_loss": [], "ae_acc": [], "dc_loss": [], "dc_acc": [], "gen_loss": [] } self.trainer = None
def __init__(self, max_t, discount_factor, min_epsilon, min_learning_rate, learning_decay, test_sample, test_sample_min_avg, memory_buffer, random_exploration_steps): #game environment self.env = gym.make('CartPole-v0') #number of frames required to win the game self.max_t = max_t #discounts future anticipated reward self.discount_factor = discount_factor #minimum value of epsilon, which is the probability of choosing a random action self.min_epsilon = min_epsilon #minimum value of the learning rate self.min_learning_rate = min_learning_rate #decay constants of learning rate wrt time self.learning_decay = learning_decay #number of episodes before target network is updated self.target_update_interval = 20 #used to end training early when sufficient process is made self.test_sample = test_sample self.test_sample_min_avg = test_sample_min_avg self.training_sample_size = 32 self.random_exploration_steps = random_exploration_steps self.step_counter = 0 #progress tracking self.episode_rewards = [] self.q_sum = [] self.criterion = MeanSquaredError() self.optimizer = Adam() #initialise model self.prediction_model = self.init_model() self.target_model = self.init_model() #initialise memory self.memory = deque(maxlen=memory_buffer)
def trainDBN(trainX): """ Unsupervised layerwise training """ for i in range(1, DBN_NUM_LAYERS): model = Sequential() model.add( Dense(DBN_LAYERS_DIM[i], input_dim=DBN_LAYERS_DIM[i - 1], activation=LeakyReLU(alpha=0.3))) model.add(Dense(DBN_LAYERS_DIM[i - 1], activation='tanh')) model.compile(loss=MeanSquaredError(), optimizer=Adam(learning_rate=INIT_LEARNING_RATE), metrics=['accuracy']) model.summary() model.fit( trainX, trainX, epochs=EPOCHS, batch_size=BATCH_SIZE, callbacks=[LearningRateDecay(INIT_LEARNING_RATE, DECAY_FACTOR)], shuffle=True) DBN_WEIGHTS.append(model.layers[0].get_weights()) if i is not DBN_NUM_LAYERS - 1: trainX = getNewTrainX(model, trainX, DBN_LAYERS_DIM[i]) model = Sequential() model.add( Dense(DBN_LAYERS_DIM[1], input_dim=DBN_LAYERS_DIM[0], activation=LeakyReLU(alpha=0.3))) for i in range(2, DBN_NUM_LAYERS): model.add(Dense(DBN_LAYERS_DIM[i], activation=LeakyReLU(alpha=0.3))) for i, layer in enumerate(model.layers): layer.set_weights(DBN_WEIGHTS[i]) print("\n-----------") print("FINAL MODEL") print("-----------\n") model.summary() print("---\n") model.save_weights(POS2VEC_WEIGHTS_PATH) with open(META_FILE, 'r') as f: meta = json.load(f) meta['pos2vec_trained'] = "True" meta['pos2vec_weights_path'] = POS2VEC_WEIGHTS_PATH with open(META_FILE, 'w') as f: json.dump(meta, f, indent=4) print("Pos2Vec training complete.") print(" Path:", POS2VEC_WEIGHTS_PATH) print("---")
def init_nn(self, input_size, hidden_layers_dim): """Initializes the neural sequential model by adding layers and compiling the model. There is no call to fit(), because the eligibilities need to be applied to the gradients before the gradients can be used to update the model weights. This is done in split-gd""" opt = Adadelta( learning_rate=self.learning_rate ) # Adagrad is well-suited for dealing with sparse data, Adadelta is extension that solves problem of shrinking learning rate loss = MeanSquaredError( ) # Larger errors should be penalized more than smaller ones model = KER.models.Sequential() model.add( KER.layers.Dense(input_size, activation="relu", input_shape=(input_size, )) ) # input layer expect one-dimensional array with input_size elements for input. This will automatically build network for i in range(len(hidden_layers_dim)): model.add(KER.layers.Dense( hidden_layers_dim[i], activation="relu")) # relu gives quick convergence model.add( KER.layers.Dense(1) ) # Observation: no activation function gives quicker convergence (could use linear) model.compile(optimizer=opt, loss=loss, metrics=[ "mean_squared_error" ]) # MSE is one ot the most preferred metrics for regression tasks # model.summary() return model
def build_base_model(input_size): in_1 = Input(shape=(input_size, ), name="input_1") in_2 = Input(shape=(input_size, ), name="input_2") norm_1 = Lambda(lambda tensor: tf.norm(tensor, axis=1, keepdims=True), name="norm_input_1")(in_1) norm_2 = Lambda(lambda tensor: tf.norm(tensor, axis=1, keepdims=True), name="norm_input_2")(in_2) norm_mul = Multiply(name="multiply_norms")([norm_1, norm_2]) model = Multiply(name="pointwise_multiply")([in_1, in_2]) model = Lambda(lambda tensor: tf.reduce_sum(tensor, axis=1, keepdims=True), name="sum")(model) model = Lambda(lambda tensors: tf.divide(tensors[0], tensors[1]), name="divide")([model, norm_mul]) model = ValueMinusInput(1, name="one_minus_input")(model) model = LessThan(0.4)(model) model_out = Lambda(lambda tensor: tf.cast(tensor, tf.float32), name="cast")(model) model = Model([in_1, in_2], model_out) model.compile(loss=MeanSquaredError(), optimizer=SGD(), metrics=[ BinaryAccuracy(), Precision(), Recall(), TrueNegatives(), FalsePositives(), FalseNegatives(), TruePositives() ]) return model
def pre_train(generator, train_dataset, valid_dataset, steps, evaluate_every=1,lr_rate=1e-4): loss_mean = Mean() pre_train_loss = MeanSquaredError() pre_train_optimizer = Adam(lr_rate) now = time.perf_counter() step = 0 for lr, hr in train_dataset.take(steps): step = step+1 with tf.GradientTape() as tape: lr = tf.cast(lr, tf.float32) hr = tf.cast(hr, tf.float32) sr = generator(lr, training=True) loss_value = pre_train_loss(hr, sr) gradients = tape.gradient(loss_value, generator.trainable_variables) pre_train_optimizer.apply_gradients(zip(gradients, generator.trainable_variables)) loss_mean(loss_value) if step % evaluate_every == 0: loss_value = loss_mean.result() loss_mean.reset_states() psnr_value = evaluate(generator, valid_dataset) duration = time.perf_counter() - now print( f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)') now = time.perf_counter()
def G_loss(y_truth, G_pred, D_pred): """generator loss construct of square-error loss """ G_mse_loss = MeanSquaredError()(y_truth, G_pred) G_bce_loss = BinaryCrossentropy()(tf.zeros_like(D_pred), D_pred) # print(tf.print(G_mse_loss)) # print(tf.print(G_bce_loss)) return G_mse_loss - 0.0003 * G_bce_loss
def mse_no_noise(self, y_true, y_pred) -> 'Loss': """mse loss wrapper making sure no_noise parameter is set :param y_true: true output values :param y_pred: predicted output values :return: loss """ assert self.no_noise, "squared_loss_no_noise loss is only allowed for no_noise=True" mse = MeanSquaredError() return mse(y_true, y_pred)
def depthwise_ed_model(input_shape, n_outputs, depth_mul=(4, 4), drp=0.3, krnl=((1, 3), (1, 3)), dil=((1, 1), (1, 1)), mpool=((0, 0), (0, 0)), dense=(100, 50), acts=('relu', 'relu'), b_norm=False, dense_drp=False, pad='valid', strides=((1, 1), (1, 1))): model = Sequential(name='Depthwise_model') if len(input_shape) < 3: model.add(Reshape((1, *input_shape), input_shape=input_shape)) else: model.add(InputLayer(input_shape=input_shape)) for i in range(len(krnl)): model.add( DepthwiseConv2D(kernel_size=krnl[i], depth_multiplier=depth_mul[i], activation=acts[0], padding=pad, strides=strides[i], dilation_rate=dil[i])) if mpool[i][0]: model.add(MaxPooling2D(pool_size=mpool[i])) for i in range(len(krnl)): if mpool[i][0]: model.add(UpSampling2D(size=mpool[len(krnl) - 1 - i])) model.add( Conv2D(kernel_size=mpool[len(krnl) - 1 - i], filters=n_outputs, activation=acts[1], padding=pad, strides=strides[len(krnl) - 1 - i], dilation_rate=dil[len(krnl) - 1 - i])) model.add(Dropout(drp)) model.add(Flatten()) if b_norm: for d in dense: model.add(Dense(d)) model.add(BatchNormalization()) model.add(Activation(acts[1])) if dense_drp: model.add(Dropout(drp)) else: for d in dense: model.add(Dense(d, activation=acts[1])) if dense_drp: model.add(Dropout(drp)) model.add(Dense(n_outputs, activation='linear')) model.compile(loss=MeanSquaredError(), optimizer=Adam(), metrics=['mape']) return model