def train(self, train_ds, steps=200000): pls_metric = Mean() dls_metric = Mean() step = 0 log_file = open(log_dir + 'losses.txt' , 'w+') log_file.close() for lr, hr in train_ds.take(steps): step += 1 pl, dl = self.train_step(lr, hr) pls_metric(pl) dls_metric(dl) if step % 50 == 0: print('{}/{}, perceptual loss = {:.4f}, discriminator loss = {:.4f}'.format(step, steps, pls_metric.result(), dls_mtric.result())) # Update log file log_file = open(log_dir + 'losses.txt' , 'a') log_file.write('{}/{}, perceptual loss = {:.4f}, discriminator loss = {:.4f}\n'.format(step, steps, pls_metric.result(), dls_mtric.result())) log_file.close() # Restart metrics pls.metric.reset_states() dls.metric.reset_states()
def pre_train(generator, train_dataset, valid_dataset, steps, evaluate_every=1,lr_rate=1e-4): loss_mean = Mean() pre_train_loss = MeanSquaredError() pre_train_optimizer = Adam(lr_rate) now = time.perf_counter() step = 0 for lr, hr in train_dataset.take(steps): step = step+1 with tf.GradientTape() as tape: lr = tf.cast(lr, tf.float32) hr = tf.cast(hr, tf.float32) sr = generator(lr, training=True) loss_value = pre_train_loss(hr, sr) gradients = tape.gradient(loss_value, generator.trainable_variables) pre_train_optimizer.apply_gradients(zip(gradients, generator.trainable_variables)) loss_mean(loss_value) if step % evaluate_every == 0: loss_value = loss_mean.result() loss_mean.reset_states() psnr_value = evaluate(generator, valid_dataset) duration = time.perf_counter() - now print( f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)') now = time.perf_counter()
def train(self, train_dataset, valid_dataset, steps, evaluate_every=1000, save_best_only=False): loss_mean = Mean() ckpt_mgr = self.checkpoint_manager ckpt = self.checkpoint self.now = time.perf_counter() for lr, hr in train_dataset.take(steps - ckpt.step.numpy()): ckpt.step.assign_add(1) step = ckpt.step.numpy() loss = self.train_step(lr, hr) loss_mean(loss) print("Currently in the train step ",step) if step % evaluate_every == 0: loss_value = loss_mean.result() loss_mean.reset_states() # Compute PSNR on validation dataset psnr_value = self.evaluate(valid_dataset) duration = time.perf_counter() - self.now print(f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)') if save_best_only and psnr_value <= ckpt.psnr: self.now = time.perf_counter() continue ckpt.psnr = psnr_value ckpt_mgr.save() self.now = time.perf_counter()
def train(model, train_dataset, test_dataset, epochs, optimizer): # statistics to store elbos = [] ssims = [] print('Starting training...') # iterate over all epochs for epoch in range(0, epochs + 1): # iterate over train_dataset containing training images for x_train in train_dataset: train_step(model, x_train, optimizer) # feed the network test samples to generate new images predictions = model.generate_images(model, test_dataset) # display the results try: display_result(predictions) except: pass loss = Mean() for test_x in test_dataset: loss(calculate_loss(model, test_x)) elbo = -loss.result() # evaluate the model using Structural Similarity between generated images and test samples and ELBO ssim = calculate_ssim(predictions, test_dataset) print("> " + str(epoch) + ": SSIM=" + str(ssim) + ', ELBO=' + str(elbo)) # add the evaluatons to a list and plot the results later ssims.append(ssim) elbos.append(elbo) # return the trained model return model, elbos, ssims
def init_loss(self): self.loss_function = SparseCategoricalCrossentropy() self.train_loss = Mean(name="train_loss") self.train_accuracy = SparseCategoricalAccuracy(name="train_accuracy") self.test_loss = Mean(name="test_loss") self.test_accuracy = SparseCategoricalAccuracy(name="test_accuracy")
def main(): # Safety checks if not os.path.exists(opt.ckptDir): os.makedirs(opt.ckptDir) if not os.path.exists(opt.logs): os.makedirs(opt.logs) logger.info('Building model...') model = WDSRConv3D(scale=3, numFilters=32, kernelSize=(3, 3, 3), numResBlocks=8, expRate=8, decayRate=0.8, numImgLR=9, patchSizeLR=32, isGrayScale=True) if opt.optimizer == 'adam': optimizer = Adam(learning_rate=5e-4) elif opt.optimizer == 'nadam': # http://cs229.stanford.edu/proj2015/054_report.pdf optimizer = Nadam(learning_rate=5e-4) else: optimizer = SGD(learning_rate=5e-4) checkpoint = tf.train.Checkpoint(step=tf.Variable(0), psnr=tf.Variable(1.0), optimizer=optimizer, model=model) checkpointManager = tf.train.CheckpointManager(checkpoint=checkpoint, directory=opt.ckptDir, max_to_keep=5) # Load Data logger.info('Loading data...') patchLR = np.load(opt.patchLR, allow_pickle=True) patchHR = np.load(opt.patchHR, allow_pickle=True) X_train, X_val, y_train, y_val = train_test_split(patchLR, patchHR, test_size=opt.split, random_state=17) valData = [X_val, y_val] # Initialize metrics trainLoss = Mean(name='trainLoss') trainPSNR = Mean(name='trainPSNR') testLoss = Mean(name='testLoss') testPSNR = Mean(name='testPSNR') fitTrainData(model, optimizer, [trainLoss, trainPSNR, testLoss, testPSNR], shiftCompensatedL1Loss, shiftCompensatedcPSNR, X_train, y_train, opt.batchSize, opt.epochs, opt.dataBufferSize, valData, opt.valSteps, checkpoint, checkpointManager, opt.logDir, opt.ckptDir, opt.saveBestOnly)
def on_epoch_begin(self): if self.epoch == 0: self.t0 = time.time() self.t1 = time.time() self.epoch += 1 self.steps = 0 self.steps_val = 0 self.metrics = {n: Mean() for n in self.names} self.metrics_val = {n: Mean() for n in self.names}
def get_metric_objects(): score_avg = Mean() max_q_avg = Mean() loss_avg = Mean() metric_objects = { 'score_avg': score_avg, 'max_q_avg': max_q_avg, 'loss_avg': loss_avg } return metric_objects
def get_classification_metrics(): train_loss = Mean() valid_loss = Mean() test_loss = Mean() metric_objects = dict() metric_objects['train_loss'] = train_loss metric_objects['valid_loss'] = valid_loss metric_objects['test_loss'] = test_loss return metric_objects
def load_metrics(): global train_loss, train_acc global valid_loss, valid_acc global test_loss, test_acc train_loss = Mean() valid_loss = Mean() test_loss = Mean() train_acc = SparseCategoricalAccuracy() valid_acc = SparseCategoricalAccuracy() test_acc = SparseCategoricalAccuracy()
def _initiate_loss_tracking(self) -> None: """ Initiates a dictionary containing all the losses tracked """ self.losses = { loss_name: Mean(loss_name, dtype=tf.float32) for loss_name in self.loss_names } self.timer = Mean("timer", dtype=tf.float32) self.start_time = time()
def __init__(self, config: MuZeroConfig, storage: SharedStorage, replay_buffer: ReplayBuffer): self.config = config self.storage = storage self.replay_buffer = replay_buffer self.summary = create_summary(name="leaner") self.metrics_loss = Mean(f'leaner-loss', dtype=tf.float32) self.network = Network(self.config) self.lr_schedule = ExponentialDecay( initial_learning_rate=self.config.lr_init, decay_steps=self.config.lr_decay_steps, decay_rate=self.config.lr_decay_rate) self.optimizer = Adam(learning_rate=self.lr_schedule)
def train(self, train_dataset, valid_dataset, save_best_only=False): loss_mean = Mean() ckpt_mgr = self.checkpoint_manager ckpt = self.checkpoint self.now = time.perf_counter() for lr, hr in train_dataset.take(self.args.num_iter - ckpt.step.numpy()): ckpt.step.assign_add(1) step = ckpt.step.numpy() loss = self.train_step(lr, hr) loss_mean(loss) loss_value = loss_mean.result() loss_mean.reset_states() lr_value = ckpt.optimizer._decayed_lr('float32').numpy() duration = time.perf_counter() - self.now self.now = time.perf_counter() if step % self.args.log_freq == 0: tf.summary.scalar('loss', loss_value, step=step) tf.summary.scalar('lr', lr_value, step=step) if step % self.args.print_freq == 0: print( f'{step}/{self.args.num_iter}: loss = {loss_value.numpy():.3f} , lr = {lr_value:.6f} ({duration:.2f}s)' ) if step % self.args.valid_freq == 0: psnr_value = self.evaluate(valid_dataset) ckpt.psnr = psnr_value tf.summary.scalar('psnr', psnr_value, step=step) print( f'{step}/{self.args.num_iter}: loss = {loss_value.numpy():.3f}, lr = {lr_value:.6f}, PSNR = {psnr_value.numpy():3f}' ) if step % self.args.save_freq == 0: # save weights only save_path = self.ckpt_path + '/weights-' + str(step) + '.h5' self.checkpoint.model.save_weights(filepath=save_path, save_format='h5') # save ckpt (weights + other train status) ckpt_mgr.save(checkpoint_number=step)
def __init__(self, model, loss, metric, optimizer, checkpoint_dir='./ckpt/3dsrnet', log_dir='logs'): self.now = None self.loss = loss self.metric = metric self.log_dir = log_dir self.train_loss = Mean(name='train_loss') self.train_psnr = Mean(name='train_psnr') self.test_loss = Mean(name='test_loss') self.test_psnr = Mean(name='test_psnr') self.checkpoint = tf.train.Checkpoint(step=tf.Variable(0), psnr=tf.Variable(1.0), optimizer=optimizer, model=model) self.checkpoint_manager = tf.train.CheckpointManager( checkpoint=self.checkpoint, directory=checkpoint_dir, max_to_keep=5) self.restore()
def __init__(self, model, loss, metric, optimizer, ckptDir, logDir, strategy, multiGPU=True, evalStep=10): # Safety checks if not os.path.exists(ckptDir): os.makedirs(ckptDir) if not os.path.exists(logDir): os.makedirs(logDir) self.ckpt = tf.train.Checkpoint(step=tf.Variable(0), psnr=tf.Variable(1.0), optimizer=optimizer, model=model) self.ckptMngr = tf.train.CheckpointManager(checkpoint=self.ckpt, directory=ckptDir, max_to_keep=5) self.loss = loss self.metric = metric self.logDir = logDir self.trainLoss = Mean(name='trainLoss') self.trainPSNR = Mean(name='trainPSNR') self.testLoss = Mean(name='testLoss') self.testPSNR = Mean(name='testPSNR') self.evalStep = evalStep self.multiGPU = multiGPU self.strategy = strategy self.restore()
def train(self, train_dataset, steps=200000): pls_metric = Mean() dls_metric = Mean() step = 0 for lr, hr in train_dataset.take(steps): step += 1 pl, dl = self.train_step(lr, hr) pls_metric(pl) dls_metric(dl) # if step % 1 == 0: if step % 50 == 0: print( f'{step}/{steps}, perceptual loss = {pls_metric.result():.4f}, discriminator loss = {dls_metric.result():.4f}' ) log_metric("GAN perceptual loss", float(f'{pls_metric.result():.4f}')) log_metric("GAN discriminator loss", float(f'{dls_metric.result():.4f}')) pls_metric.reset_states() dls_metric.reset_states()
def __init__(self, model, band, image_hr_size, name_net, loss, metric, optimizer, checkpoint_dir='./checkpoint', log_dir='logs'): self.now = None self.band = band self.name_net = name_net self.loss = loss self.image_hr_size = image_hr_size self.metric = metric self.log_dir = log_dir self.train_loss = Mean(name='train_loss') self.train_psnr = Mean(name='train_psnr') self.test_loss = Mean(name='test_loss') self.test_psnr = Mean(name='test_psnr') self.checkpoint = tf.train.Checkpoint(step=tf.Variable(0), psnr=tf.Variable(1.0), optimizer=optimizer, model=model) self.checkpoint_manager = tf.train.CheckpointManager( checkpoint=self.checkpoint, directory=checkpoint_dir, max_to_keep=3) self.restore()
def __init__(self, fbnet, input_shape, initial_temperature=5, temperature_decay_rate=0.956, temperature_decay_steps=1, latency_alpha=0.2, latency_beta=0.6, weight_lr=0.01, weight_momentum=0.9, weight_decay=1e-4, theta_lr=1e-3, theta_beta1=0.9, theta_beta2=0.999, theta_decay=5e-4): self._epoch = 0 self.initial_temperature = initial_temperature self.temperature = initial_temperature self.latency_alpha = latency_alpha self.latency_beta = latency_beta self.exponential_decay = lambda step: exponential_decay( initial_temperature, temperature_decay_rate, temperature_decay_steps, step) fbnet.build(input_shape) self.fbnet = fbnet self.weights = [] self.thetas = [] for trainable_weight in fbnet.trainable_weights: if 'theta' in trainable_weight.name: self.thetas.append(trainable_weight) else: self.weights.append(trainable_weight) self.weight_opt = SGD(learning_rate=weight_lr, momentum=weight_momentum, decay=weight_decay) self.theta_opt = Adam(learning_rate=theta_lr, beta_1=theta_beta1, beta_2=theta_beta2, decay=theta_decay) self.loss_fn = SparseCategoricalCrossentropy(from_logits=True) self.accuracy_metric = SparseCategoricalAccuracy() self.loss_metric = Mean()
class Leaner: def __init__(self, config: MuZeroConfig, storage: SharedStorage, replay_buffer: ReplayBuffer): self.config = config self.storage = storage self.replay_buffer = replay_buffer self.summary = create_summary(name="leaner") self.metrics_loss = Mean(f'leaner-loss', dtype=tf.float32) self.network = Network(self.config) self.lr_schedule = ExponentialDecay( initial_learning_rate=self.config.lr_init, decay_steps=self.config.lr_decay_steps, decay_rate=self.config.lr_decay_rate) self.optimizer = Adam(learning_rate=self.lr_schedule) def start(self): while self.network.training_steps() < self.config.training_steps: if ray.get(self.replay_buffer.size.remote()) > 0: self.train() if self.network.training_steps( ) % self.config.checkpoint_interval == 0: weigths = self.network.get_weights() self.storage.update_network.remote(weigths) if self.network.training_steps( ) % self.config.save_interval == 0: self.network.save() print("Finished") def train(self): batch = ray.get(self.replay_buffer.sample_batch.remote()) with tf.GradientTape() as tape: loss = self.network.loss_function(batch) grads = tape.gradient(loss, self.network.get_variables()) self.optimizer.apply_gradients(zip(grads, self.network.get_variables())) self.metrics_loss(loss) with self.summary.as_default(): tf.summary.scalar(f'loss', self.metrics_loss.result(), self.network.training_steps()) self.metrics_loss.reset_states() self.network.update_training_steps()
def train(self, feature_value, embedding_index, label, optimizer='adam', learning_rate=1e-4, loss='sigmoid', epochs=50, batch=32, shuffle=10000): for epoch in range(epochs): train_set = tensorflow.data.Dataset.from_tensor_slices( (feature_value, embedding_index, label)).shuffle(shuffle).batch(batch, drop_remainder=True) for batch_set in train_set: with GradientTape() as tape: prediction = self.call(feature_value=batch_set[0], embedding_index=batch_set[1], training=True) self.loss_obj = get_loss(loss) self.optimizer = get_optimizer(optimizer, learning_rate=learning_rate) batch_loss = self.loss_obj(batch_set[2], prediction) gradients = tape.gradient(batch_loss, self.trainable_variables) self.optimizer.apply_gradients( zip(gradients, self.trainable_variables)) mean_loss = Mean(name='train_loss') print('epoch: {} ==> loss: {}'.format(epoch + 1, mean_loss(batch_loss)))
def get_classification_metrics(): train_loss = Mean() train_acc = SparseCategoricalAccuracy() valid_loss = Mean() valid_acc = SparseCategoricalAccuracy() test_loss = Mean() test_acc = SparseCategoricalAccuracy() metric_objects = dict() metric_objects['train_loss'] = train_loss metric_objects['train_acc'] = train_acc metric_objects['valid_loss'] = valid_loss metric_objects['valid_acc'] = valid_acc metric_objects['test_loss'] = test_loss metric_objects['test_acc'] = test_acc return metric_objects
def __init__(self, encoder, predictor, feature_dim,loss_type, **kwargs): super(VFIB, self).__init__(**kwargs) self.encoder = encoder self.classifier = predictor self.loss_type = loss_type self.total_loss_tracker = Mean(name="total_loss") self.prediction_loss_tracker = Mean(name="prediction_loss") self.kl_loss_tracker = Mean(name="kl_loss") self.mmd_loss_tracker = Mean(name="mmd_loss")
def train(self, train_dataset, valid_dataset, steps, evaluate_every=1000, save_best_only=False): loss_mean = Mean() ckpt_mgr = self.checkpoint_manager ckpt = self.checkpoint self.now = time.perf_counter() for lr, hr in train_dataset.take(steps - ckpt.step.numpy( )): # for low_resolution+high_resolution image pair in dataset t_start = time.time() ckpt.step.assign_add(1) step = ckpt.step.numpy() loss = self.train_step(lr, hr) loss_mean(loss) t_end = time.time() print("epoch:%3d step:%2d loss:%.5f time:%.3f" % (step / 50, step % 50, loss, t_end - t_start)) # evaluate if step % evaluate_every == 0: loss_value = loss_mean.result() loss_mean.reset_states() # Compute PSNR on validation dataset psnr_value = self.evaluate(valid_dataset) duration = time.perf_counter() - self.now print( f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)' ) if save_best_only and psnr_value <= ckpt.psnr: # if no PSNR improvement self.now = time.perf_counter() # skip saving checkpoint continue ckpt.psnr = psnr_value ckpt_mgr.save() print("checkpoint saved!") self.now = time.perf_counter()
def train( model: CAE, dataset, output_path: str, epochs: Optional[int], image_width: int, image_height: int, log_freq: int, save_freq: int, ) -> None: @tf.function def train_step(image): with tf.GradientTape() as tape: pred_image = model(image) model_trainable_variables = model.trainable_variables loss = MSE(image, pred_image) gradients = tape.gradient(loss, model_trainable_variables) optimizer.apply_gradients(zip(gradients, model_trainable_variables)) train_loss(loss) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001) ckpt = tf.train.Checkpoint(optimizer=optimizer, transformer=model) manager = tf.train.CheckpointManager(ckpt, output_path, max_to_keep=1) train_loss = Mean(name='train_loss') epochs = epochs or len(dataset) section_size = 128 for step, train_image in enumerate(dataset): train_image = train_image.numpy() for c in range(image_height // section_size): for j in range(image_width // section_size): cc = section_size * c jj = section_size * j train_image_batch = train_image[:, cc:cc + section_size, jj:jj + section_size, :] train_image_tensor = tf.convert_to_tensor(train_image_batch) train_step(train_image_tensor) if step % log_freq == 0: print(f'Step {step}/{epochs}, ' f'Loss: {train_loss.result()}, ') if step % save_freq == 0 or step == epochs - 1: print(f'Saved checkpoint: {manager.save()}') train_loss.reset_states() if epochs and step == epochs: break
def __init__(self, config: MuZeroConfig, storage: SharedStorage, replay_buffer: ReplayBuffer, temperature: float = 1.0): self.config = config self.network = Network(self.config) self.storage = storage self.replay_buffer = replay_buffer self.temperature = temperature self.name = f"games-{temperature}" self.summary = create_summary(name=self.name) self.games_played = 0 self.metrics_games = Sum(self.name, dtype=tf.int32) self.metrics_temperature = Sum(self.name, dtype=tf.float32) self.metrics_rewards = Mean(self.name, dtype=tf.float32) self.started = False
def train(self, train_dataset, valid_dataset, steps, evaluate_every=1000, save_best_only=False): loss_mean = Mean() ckpt_mgr = self.checkpoint_manager ckpt = self.checkpoint vis_list = [] for lr, hr in train_dataset.take(steps - ckpt.step.numpy()): ckpt.step.assign_add(1) step = ckpt.step.numpy() loss = self.train_step(lr, hr) loss_mean(loss) if step % evaluate_every == 0: loss_value = loss_mean.result() loss_mean.reset_states() # Compute PSNR on validation dataset psnr_value = self.evaluate(valid_dataset) print( f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f}' ) vis_list.append((step, loss_value, psnr_value)) if save_best_only and psnr_value <= ckpt.psnr: # skip saving checkpoint, no PSNR improvement continue ckpt.psnr = psnr_value ckpt_mgr.save() # saving progress data to make graphs csv = open('./visLoss.csv', 'w') csv.write('step, loss, psnr\n') for vals in vis_list: csv.write('{},{},{}\n'.format(vals[0], vals[1], vals[2])) csv.close()
def get_classification_metrics(losses=None, accuracy=None): train_loss = Mean() train_acc = SparseCategoricalAccuracy() validation_loss = Mean() validation_acc = SparseCategoricalAccuracy() test_lost = Mean() test_acc = SparseCategoricalAccuracy() metrics_objects = dict() metrics_objects['train_loss'] = train_loss metrics_objects['train_accuracy'] = train_acc metrics_objects['validation_loss'] = validation_loss metrics_objects['validation_accuracy'] = validation_acc metrics_objects['test_loss'] = test_lost metrics_objects['test_accuracy'] = test_acc return metrics_objects
def train(self, train_dataset, valid_dataset, steps, evaluate_every=1000, save_best_only=False): loss_mean = Mean() ckpt_mgr = self.checkpoint_manager ckpt = self.checkpoint self.now = time.perf_counter() for lr, hr in train_dataset.take(steps - ckpt.step.numpy()): #print('check1..', steps, ckpt.step.numpy()) ckpt.step.assign_add(1) step = ckpt.step.numpy() loss = self.train_step(lr, hr) loss_mean(loss) if step % evaluate_every == 0: loss_value = loss_mean.result() loss_mean.reset_states() # Compute PSNR on validation dataset psnr_value = self.evaluate(valid_dataset) duration = time.perf_counter() - self.now print( f'{step}/{steps}: loss = {loss_value.numpy():.3f}, PSNR = {psnr_value.numpy():3f} ({duration:.2f}s)' ) ######### self.resolve_and_plot('demo/img_0', step) ######### if save_best_only and psnr_value <= ckpt.psnr: self.now = time.perf_counter() # skip saving checkpoint, no PSNR improvement continue ckpt.psnr = psnr_value ckpt_mgr.save() self.now = time.perf_counter()
class MeanBasedMetric(Metric): def __init__(self, name, dtype): super().__init__(name, dtype=dtype) self._mean = Mean(dtype=dtype) @abstractmethod def _objective_function(self, y_true, y_pred): pass def update_state(self, y_true, y_pred, sample_weight=None): values = self._objective_function(y_true, y_pred) self._mean.update_state(values=values, sample_weight=sample_weight) def result(self): return self._mean.result() def reset_states(self): self._mean.reset_states()
def train_generator(self, train_dataset, valid_dataset, epochs=20000, valid_lr=None, valid_hr=None): evaluate_size = epochs / 10 loss_mean = Mean() start_time = time.time() epoch = 0 for lr, hr in train_dataset.take(epochs): epoch += 1 step = tf.convert_to_tensor(epoch, dtype=tf.int64) generator_loss = self.train_generator_step(lr, hr) loss_mean(generator_loss) if epoch % 50 == 0: loss_value = loss_mean.result() loss_mean.reset_states() psnr_value = self.evaluate(valid_dataset.take(1)) print( f'Time for epoch {epoch}/{epochs} is {(time.time() - start_time):.4f} sec, ' f'gan loss = {loss_value:.4f}, psnr = {psnr_value:.4f}') start_time = time.time() if self.summary_writer is not None: with self.summary_writer.as_default(): tf.summary.scalar('generator_loss', loss_value, step=epoch) tf.summary.scalar('psnr', psnr_value, step=epoch) if epoch % evaluate_size == 0: self.util.save_checkpoint(self.checkpoint, epoch) if epoch % 5000 == 0: self.generate_and_save_images(step, valid_lr, valid_hr)