def main(config): experiment_name, current_time = setup_experiment(config.title, config) # normalization (creating t1_landmarks.npy file) create_normalization_file( use_controls=config.use_controls, use_nofcd=config.use_ae, mods=config.nb_of_modalities, ) print('Normalization is finished') # patch extraction get_patch_list(use_controls=config.use_controls, use_fcd=config.use_ae, use_coronal=config.use_coronal, use_sagital=config.use_sagital, augment=config.augment, h=config.height, w=config.width, hard_labeling=config.hard_labeling, mods=config.nb_of_modalities, batch_size=config.batch_size) print('Patch extraction is finished') # cnn model top_k_scores = train_model(mods=config.nb_of_modalities, use_ae=config.use_ae, h=config.height, w=config.width, use_coronal=config.use_coronal, use_sagital=config.use_sagital, use_controls=config.use_controls, latent_dim=config.latent_size, batch_size=config.batch_size, lr=config.lr, weight_decay=config.weight_decay, weight_of_class=config.weight_of_class, n_epochs=config.nb_epochs, n_epochs_ae=config.nb_epochs_ae, p=config.dropout_rate, save_masks=config.save_masks, parallel=config.parallel, experiment_name=experiment_name, temporal_division=config.temporal_division, seed=config.seed) print(top_k_scores) print('LOO mean top-k score:', top_k_scores.mean()) # logging log_experiment(config, current_time, (top_k_scores > 0).mean())
def run(dispatch_type): '''test all dispatchers whose type is dispatch_type''' assert dispatch_type in [ 'Tuner', 'Assessor' ], 'Unsupported dispatcher type: %s' % (dispatch_type) dipsatcher_list = TUNER_LIST if dispatch_type == 'Tuner' else ASSESSOR_LIST for dispatcher_name in dipsatcher_list: try: # Sleep here to make sure previous stopped exp has enough time to exit to avoid port conflict time.sleep(6) test_builtin_dispatcher(dispatch_type, dispatcher_name) print(GREEN + 'Test %s %s: TEST PASS' % (dispatcher_name, dispatch_type) + CLEAR) except Exception as error: print(RED + 'Test %s %s: TEST FAIL' % (dispatcher_name, dispatch_type) + CLEAR) print('%r' % error) traceback.print_exc() raise error finally: subprocess.run(['nnictl', 'stop']) if __name__ == '__main__': installed = (sys.argv[-1] != '--preinstall') setup_experiment(installed) run('Tuner') run('Assessor')
# test cmd `nnictl stop --port` proc = subprocess.run(['nnictl', 'stop', '--port', '8990']) assert proc.returncode == 0, '`nnictl stop %s` failed with code %d' % (experiment_id, proc.returncode) snooze() assert not detect_port(8990), '`nnictl stop %s` failed to stop experiments' % experiment_id # test cmd `nnictl stop --all` proc = subprocess.run(['nnictl', 'stop', '--all']) assert proc.returncode == 0, '`nnictl stop --all` failed with code %d' % proc.returncode snooze() assert not detect_port(8888) and not detect_port(8989), '`nnictl stop --all` failed to stop experiments' if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, required=True) parser.add_argument("--preinstall", action='store_true') args = parser.parse_args() setup_experiment(not args.preinstall) try: naive_test(args) stop_experiment_test(args) # TODO: check the output of rest server print(GREEN + 'PASS' + CLEAR) except Exception as error: print(RED + 'FAIL' + CLEAR) print('%r' % error) traceback.print_exc() sys.exit(1)
def main(_): # Make sure we have a valid config that inherits all the keys defined in the # base config. validate_config(FLAGS.config, mode="pretrain") config = FLAGS.config exp_dir = osp.join(config.root_dir, FLAGS.experiment_name) setup_experiment(exp_dir, config, FLAGS.resume) # No need to do any pretraining if we're loading the raw pretrained # ImageNet baseline. if FLAGS.raw_imagenet: return # Setup compute device. if torch.cuda.is_available(): device = torch.device(FLAGS.device) else: logging.info("No GPU device found. Falling back to CPU.") device = torch.device("cpu") logging.info("Using device: %s", device) # Set RNG seeds. if config.seed is not None: logging.info("Pretraining experiment seed: %d", config.seed) experiment.seed_rngs(config.seed) experiment.set_cudnn(config.cudnn_deterministic, config.cudnn_benchmark) else: logging.info( "No RNG seed has been set for this pretraining experiment.") logger = Logger(osp.join(exp_dir, "tb"), FLAGS.resume) # Load factories. ( model, optimizer, pretrain_loaders, downstream_loaders, trainer, eval_manager, ) = common.get_factories(config, device) # Create checkpoint manager. checkpoint_dir = osp.join(exp_dir, "checkpoints") checkpoint_manager = CheckpointManager( checkpoint_dir, model=model, optimizer=optimizer, ) global_step = checkpoint_manager.restore_or_initialize() total_batches = max(1, len(pretrain_loaders["train"])) epoch = int(global_step / total_batches) complete = False stopwatch = Stopwatch() try: while not complete: for batch in pretrain_loaders["train"]: train_loss = trainer.train_one_iter(batch) if not global_step % config.logging_frequency: for k, v in train_loss.items(): logger.log_scalar(v, global_step, k, "pretrain") logger.flush() if not global_step % config.eval.eval_frequency: # Evaluate the model on the pretraining validation dataset. valid_loss = trainer.eval_num_iters( pretrain_loaders["valid"], config.eval.val_iters, ) for k, v in valid_loss.items(): logger.log_scalar(v, global_step, k, "pretrain") # Evaluate the model on the downstream datasets. for split, downstream_loader in downstream_loaders.items(): eval_to_metric = eval_manager.evaluate( model, downstream_loader, device, config.eval.val_iters, ) for eval_name, eval_out in eval_to_metric.items(): eval_out.log( logger, global_step, eval_name, f"downstream/{split}", ) # Save model checkpoint. if not global_step % config.checkpointing_frequency: checkpoint_manager.save(global_step) # Exit if complete. global_step += 1 if global_step > config.optim.train_max_iters: complete = True break time_per_iter = stopwatch.elapsed() logging.info( "Iter[{}/{}] (Epoch {}), {:.6f}s/iter, Loss: {:.3f}". format( global_step, config.optim.train_max_iters, epoch, time_per_iter, train_loss["train/total_loss"].item(), )) stopwatch.reset() epoch += 1 except KeyboardInterrupt: logging.info( "Caught keyboard interrupt. Saving model before quitting.") finally: checkpoint_manager.save(global_step) logger.close()
def main(_): # Make sure we have a valid config that inherits all the keys defined in the # base config. validate_config(FLAGS.config, mode="rl") config = FLAGS.config exp_dir = osp.join( config.save_dir, FLAGS.experiment_name, str(FLAGS.seed), ) utils.setup_experiment(exp_dir, config, FLAGS.resume) # Setup compute device. if torch.cuda.is_available(): device = torch.device(FLAGS.device) else: logging.info("No GPU device found. Falling back to CPU.") device = torch.device("cpu") logging.info("Using device: %s", device) # Set RNG seeds. if FLAGS.seed is not None: logging.info("RL experiment seed: %d", FLAGS.seed) experiment.seed_rngs(FLAGS.seed) experiment.set_cudnn(config.cudnn_deterministic, config.cudnn_benchmark) else: logging.info("No RNG seed has been set for this RL experiment.") # Load env. env = utils.make_env( FLAGS.env_name, FLAGS.seed, action_repeat=config.action_repeat, frame_stack=config.frame_stack, ) eval_env = utils.make_env( FLAGS.env_name, FLAGS.seed + 42, action_repeat=config.action_repeat, frame_stack=config.frame_stack, save_dir=osp.join(exp_dir, "video", "eval"), ) # Dynamically set observation and action space values. config.sac.obs_dim = env.observation_space.shape[0] config.sac.action_dim = env.action_space.shape[0] config.sac.action_range = [ float(env.action_space.low.min()), float(env.action_space.high.max()), ] # Resave the config since the dynamic values have been updated at this point # and make it immutable for safety :) utils.dump_config(exp_dir, config) config = config_dict.FrozenConfigDict(config) policy = agent.SAC(device, config.sac) buffer = utils.make_buffer(env, device, config) # Create checkpoint manager. checkpoint_dir = osp.join(exp_dir, "checkpoints") checkpoint_manager = CheckpointManager( checkpoint_dir, policy=policy, **policy.optim_dict(), ) logger = Logger(osp.join(exp_dir, "tb"), FLAGS.resume) try: start = checkpoint_manager.restore_or_initialize() observation, done = env.reset(), False for i in tqdm(range(start, config.num_train_steps), initial=start): if i < config.num_seed_steps: action = env.action_space.sample() else: policy.eval() action = policy.act(observation, sample=True) next_observation, reward, done, info = env.step(action) if not done or "TimeLimit.truncated" in info: mask = 1.0 else: mask = 0.0 if not config.reward_wrapper.pretrained_path: buffer.insert(observation, action, reward, next_observation, mask) else: buffer.insert( observation, action, reward, next_observation, mask, env.render(mode="rgb_array"), ) observation = next_observation if done: observation, done = env.reset(), False for k, v in info["episode"].items(): logger.log_scalar(v, info["total"]["timesteps"], k, "training") if i >= config.num_seed_steps: policy.train() train_info = policy.update(buffer, i) if (i + 1) % config.log_frequency == 0: for k, v in train_info.items(): logger.log_scalar(v, info["total"]["timesteps"], k, "training") logger.flush() if (i + 1) % config.eval_frequency == 0: eval_stats = evaluate(policy, eval_env, config.num_eval_episodes) for k, v in eval_stats.items(): logger.log_scalar( v, info["total"]["timesteps"], f"average_{k}s", "evaluation", ) logger.flush() if (i + 1) % config.checkpoint_frequency == 0: checkpoint_manager.save(i) except KeyboardInterrupt: print("Caught keyboard interrupt. Saving before quitting.") finally: checkpoint_manager.save(i) # pylint: disable=undefined-loop-variable logger.close()
def main(args): # gpu or cpu device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') args = utils.setup_experiment(args) utils.init_logging(args) # Loading models MODEL_PATH_LOAD = "../lidar_experiments/2d/lidar_unet2d/lidar-unet2d-Nov-08-16:29:49/checkpoints/checkpoint_best.pt" train_new_model = True # Build data loaders, a model and an optimizer if train_new_model: model = models.build_model(args).to(device) else: model = models.build_model(args) model.load_state_dict(torch.load(args.MODEL_PATH_LOAD)['model'][0]) model.to(device) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[5, 15, 30, 50, 100, 250], gamma=0.5) logging.info( f"Built a model consisting of {sum(p.numel() for p in model.parameters()):,} parameters" ) if args.resume_training: state_dict = utils.load_checkpoint(args, model, optimizer, scheduler) global_step = state_dict['last_step'] start_epoch = int(state_dict['last_step'] / (403200 / state_dict['args'].batch_size)) + 1 else: global_step = -1 start_epoch = 0 ## Load the pts files # Loads as a list of numpy arrays scan_line_tensor = torch.load(args.data_path + 'scan_line_tensor.pts') train_idx_list = torch.load(args.data_path + 'train_idx_list.pts') valid_idx_list = torch.load(args.data_path + 'valid_idx_list.pts') sc = torch.load(args.data_path + 'sc.pts') # Dataloaders train_dataset = LidarLstmDataset(scan_line_tensor, train_idx_list, args.seq_len, args.mask_pts_per_seq) valid_dataset = LidarLstmDataset(scan_line_tensor, valid_idx_list, args.seq_len, args.mask_pts_per_seq) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.batch_size, num_workers=4, shuffle=True) # Track moving average of loss values train_meters = { name: utils.RunningAverageMeter(0.98) for name in (["train_loss"]) } valid_meters = {name: utils.AverageMeter() for name in (["valid_loss"])} writer = SummaryWriter( log_dir=args.experiment_dir) if not args.no_visual else None ################################################## # TRAINING for epoch in range(start_epoch, args.num_epochs): if args.resume_training: if epoch % 1 == 0: optimizer.param_groups[0]["lr"] /= 2 print('learning rate reduced by factor of 2') train_bar = utils.ProgressBar(train_loader, epoch) for meter in train_meters.values(): meter.reset() # epoch_loss_sum = 0 for batch_id, (clean, mask) in enumerate(train_bar): # dataloader returns [clean, mask] list model.train() global_step += 1 inputs = clean.to(device) mask_inputs = mask.to(device) # only use the mask part of the outputs raw_outputs = model(inputs, mask_inputs) outputs = ( 1 - mask_inputs[:, :3, :, :] ) * raw_outputs + mask_inputs[:, :3, :, :] * inputs[:, :3, :, :] if args.wtd_loss: loss = weighted_MSELoss(outputs, inputs[:, :3, :, :], sc) / (inputs.size(0) * (args.mask_pts_per_seq**2)) # Regularization? else: # normalized by the number of masked points loss = F.mse_loss(outputs, inputs[:,:3,:,:], reduction="sum") / \ (inputs.size(0) * (args.mask_pts_per_seq**2)) model.zero_grad() loss.backward() optimizer.step() # epoch_loss_sum += loss * inputs.size(0) train_meters["train_loss"].update(loss) train_bar.log(dict(**train_meters, lr=optimizer.param_groups[0]["lr"]), verbose=True) if writer is not None and global_step % args.log_interval == 0: writer.add_scalar("lr", optimizer.param_groups[0]["lr"], global_step) writer.add_scalar("loss/train", loss.item(), global_step) gradients = torch.cat([ p.grad.view(-1) for p in model.parameters() if p.grad is not None ], dim=0) writer.add_histogram("gradients", gradients, global_step) sys.stdout.flush() # epoch_loss = epoch_loss_sum / len(train_loader.dataset) if epoch % args.valid_interval == 0: model.eval() for meter in valid_meters.values(): meter.reset() valid_bar = utils.ProgressBar(valid_loader) val_loss = 0 for sample_id, (clean, mask) in enumerate(valid_bar): with torch.no_grad(): inputs = clean.to(device) mask_inputs = mask.to(device) # only use the mask part of the outputs raw_output = model(inputs, mask_inputs) output = ( 1 - mask_inputs[:, :3, :, :] ) * raw_output + mask_inputs[:, :3, :, :] * inputs[:, : 3, :, :] # TO DO, only run loss on masked part of output if args.wtd_loss: val_loss = weighted_MSELoss( output, inputs[:, :3, :, :], sc) / (inputs.size(0) * (args.mask_pts_per_seq**2)) else: # normalized by the number of masked points val_loss = F.mse_loss(output, inputs[:,:3,:,:], reduction="sum")/(inputs.size(0)* \ (args.mask_pts_per_seq**2)) valid_meters["valid_loss"].update(val_loss.item()) if writer is not None: writer.add_scalar("loss/valid", valid_meters['valid_loss'].avg, global_step) sys.stdout.flush() logging.info( train_bar.print( dict(**train_meters, **valid_meters, lr=optimizer.param_groups[0]["lr"]))) utils.save_checkpoint(args, global_step, model, optimizer, score=valid_meters["valid_loss"].avg, mode="min") scheduler.step() logging.info( f"Done training! Best Loss {utils.save_checkpoint.best_score:.3f} obtained after step {utils.save_checkpoint.best_step}." )
def main(unused_args): assert len(unused_args) == 1, unused_args setup_experiment(logging, FLAGS, "critic_model") if FLAGS.validation: mnist_ds = mnist.read_data_sets(FLAGS.data_dir, dtype=tf.float32, reshape=False, validation_size=0) val_ds = mnist_ds.test else: mnist_ds = mnist.read_data_sets(FLAGS.data_dir, dtype=tf.float32, reshape=False, validation_size=FLAGS.validation_size) val_ds = mnist_ds.validation train_ds = mnist_ds.train val_ds = mnist_ds.validation test_ds = mnist_ds.test num_classes = FLAGS.num_classes img_shape = [None, 1, 28, 28] X = tf.placeholder(tf.float32, shape=img_shape, name='X') # placeholder to avoid recomputation of adversarial images for critic X_hat_h = tf.placeholder(tf.float32, shape=img_shape, name='X_hat') y = tf.placeholder(tf.int32, shape=[None], name='y') y_onehot = tf.one_hot(y, num_classes) reduce_ind = list(range(1, X.get_shape().ndims)) # test/validation inputs X_v = tf.placeholder(tf.float32, shape=img_shape, name='X_v') y_v = tf.placeholder(tf.int32, shape=[None], name='y_v') y_v_onehot = tf.one_hot(y_v, num_classes) # classifier model model = create_model(FLAGS, name=FLAGS.model_name) def test_model(x, **kwargs): return model(x, train=False, **kwargs) # generator def generator(inputs, confidence, targets=None): return high_confidence_attack_unrolled( lambda x: model(x)['logits'], inputs, targets=targets, confidence=confidence, max_iter=FLAGS.attack_iter, over_shoot=FLAGS.attack_overshoot, attack_random=FLAGS.attack_random, attack_uniform=FLAGS.attack_uniform, attack_label_smoothing=FLAGS.attack_label_smoothing) def test_generator(inputs, confidence, targets=None): return high_confidence_attack(lambda x: test_model(x)['logits'], inputs, targets=targets, confidence=confidence, max_iter=FLAGS.df_iter, over_shoot=FLAGS.df_overshoot, random=FLAGS.attack_random, uniform=FLAGS.attack_uniform, clip_dist=FLAGS.df_clip) # discriminator critic = create_model(FLAGS, prefix='critic_', name='critic') # classifier outputs outs_x = model(X) outs_x_v = test_model(X_v) params = tf.trainable_variables() model_weights = [param for param in params if "weights" in param.name] vars = tf.model_variables() target_conf_v = [None] if FLAGS.attack_confidence == "same": # set the target confidence to the confidence of the original prediction target_confidence = outs_x['conf'] target_conf_v[0] = target_confidence elif FLAGS.attack_confidence == "class_running_mean": # set the target confidence to the mean confidence of the specific target # use running mean estimate class_conf_mean = tf.Variable(np.ones(num_classes, dtype=np.float32)) batch_conf_mean = tf.unsorted_segment_mean(outs_x['conf'], outs_x['pred'], num_classes) # if batch does not contain predictions for the specific target # (zeroes), replace zeroes with stored class mean (previous batch) batch_conf_mean = tf.where(tf.not_equal(batch_conf_mean, 0), batch_conf_mean, class_conf_mean) # update class confidence mean class_conf_mean = assign_moving_average(class_conf_mean, batch_conf_mean, 0.5) # init class confidence during pre-training tf.add_to_collection("PREINIT_OPS", class_conf_mean) def target_confidence(targets_onehot): targets = tf.argmax(targets_onehot, axis=1) check_conf = tf.Assert( tf.reduce_all(tf.not_equal(class_conf_mean, 0)), [class_conf_mean]) with tf.control_dependencies([check_conf]): t = tf.gather(class_conf_mean, targets) target_conf_v[0] = t return tf.stop_gradient(t) else: target_confidence = float(FLAGS.attack_confidence) target_conf_v[0] = target_confidence X_hat = generator(X, target_confidence) outs_x_hat = model(X_hat) # select examples for which attack succeeded (changed the prediction) X_hat_filter = tf.not_equal(outs_x['pred'], outs_x_hat['pred']) X_hat_f = tf.boolean_mask(X_hat, X_hat_filter) X_f = tf.boolean_mask(X, X_hat_filter) outs_x_f = model(X_f) outs_x_hat_f = model(X_hat_f) X_hatd = tf.stop_gradient(X_hat) X_rec = generator(X_hatd, outs_x['conf'], outs_x['pred']) X_rec_f = tf.boolean_mask(X_rec, X_hat_filter) # validation/test adversarial examples X_v_hat = test_generator(X_v, FLAGS.val_attack_confidence) X_v_hatd = tf.stop_gradient(X_v_hat) X_v_rec = test_generator(X_v_hatd, outs_x_v['conf'], targets=outs_x_v['pred']) X_v_hat_df = deepfool(lambda x: test_model(x)['logits'], X_v, y_v, max_iter=FLAGS.df_iter, clip_dist=FLAGS.df_clip) X_v_hat_df_all = deepfool(lambda x: test_model(x)['logits'], X_v, max_iter=FLAGS.df_iter, clip_dist=FLAGS.df_clip) y_hat = outs_x['pred'] y_adv = outs_x_hat['pred'] y_adv_f = outs_x_hat_f['pred'] tf.summary.histogram('y_data', y, collections=["model_summaries"]) tf.summary.histogram('y_hat', y_hat, collections=["model_summaries"]) tf.summary.histogram('y_adv', y_adv, collections=["model_summaries"]) # critic outputs critic_outs_x = critic(X) critic_outs_x_hat = critic(X_hat_f) critic_params = list(set(tf.trainable_variables()) - set(params)) critic_vars = list(set(tf.trainable_variables()) - set(vars)) # binary logits for a specific target logits_data = critic_outs_x['logits'] logits_data_flt = tf.reshape(logits_data, (-1, )) z_data = tf.gather(logits_data_flt, tf.range(tf.shape(X)[0]) * num_classes + y) logits_adv = critic_outs_x_hat['logits'] logits_adv_flt = tf.reshape(logits_adv, (-1, )) z_adv = tf.gather(logits_adv_flt, tf.range(tf.shape(X_hat_f)[0]) * num_classes + y_adv_f) # classifier/generator losses nll = tf.reduce_mean( tf.losses.softmax_cross_entropy(y_onehot, outs_x['logits'])) nll_v = tf.reduce_mean( tf.losses.softmax_cross_entropy(y_v_onehot, outs_x_v['logits'])) # gan losses gan = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_adv), z_adv) rec_l1 = tf.reduce_mean( tf.reduce_sum(tf.abs(X_f - X_rec_f), axis=reduce_ind)) rec_l2 = tf.reduce_mean(tf.reduce_sum((X_f - X_rec_f)**2, axis=reduce_ind)) weight_decay = slim.apply_regularization(slim.l2_regularizer(1.0), model_weights[:-1]) pretrain_loss = nll + 5e-6 * weight_decay loss = nll + FLAGS.lmbd * gan if FLAGS.lmbd_rec_l1 > 0: loss += FLAGS.lmbd_rec_l1 * rec_l1 if FLAGS.lmbd_rec_l2 > 0: loss += FLAGS.lmbd_rec_l2 * rec_l2 if FLAGS.weight_decay > 0: loss += FLAGS.weight_decay * weight_decay # critic loss critic_gan_data = tf.losses.sigmoid_cross_entropy(tf.ones_like(z_data), z_data) # use placeholder for X_hat to avoid recomputation of adversarial noise y_adv_h = model(X_hat_h)['pred'] logits_adv_h = critic(X_hat_h)['logits'] logits_adv_flt_h = tf.reshape(logits_adv_h, (-1, )) z_adv_h = tf.gather(logits_adv_flt_h, tf.range(tf.shape(X_hat_h)[0]) * num_classes + y_adv_h) critic_gan_adv = tf.losses.sigmoid_cross_entropy(tf.zeros_like(z_adv_h), z_adv_h) critic_gan = critic_gan_data + critic_gan_adv # Gulrajani discriminator regularizer (we do not interpolate) critic_grad_data = tf.gradients(z_data, X)[0] critic_grad_adv = tf.gradients(z_adv_h, X_hat_h)[0] critic_grad_penalty = norm_penalty(critic_grad_adv) + norm_penalty( critic_grad_data) critic_loss = critic_gan + FLAGS.lmbd_grad * critic_grad_penalty # classifier model_metrics err = 1 - slim.metrics.accuracy(outs_x['pred'], y) conf = tf.reduce_mean(outs_x['conf']) err_hat = 1 - slim.metrics.accuracy( test_model(X_hat)['pred'], outs_x['pred']) err_hat_f = 1 - slim.metrics.accuracy( test_model(X_hat_f)['pred'], outs_x_f['pred']) err_rec = 1 - slim.metrics.accuracy( test_model(X_rec)['pred'], outs_x['pred']) conf_hat = tf.reduce_mean(test_model(X_hat)['conf']) conf_hat_f = tf.reduce_mean(test_model(X_hat_f)['conf']) conf_rec = tf.reduce_mean(test_model(X_rec)['conf']) err_v = 1 - slim.metrics.accuracy(outs_x_v['pred'], y_v) conf_v_hat = tf.reduce_mean(test_model(X_v_hat)['conf']) l2_hat = tf.sqrt(tf.reduce_sum((X_f - X_hat_f)**2, axis=reduce_ind)) tf.summary.histogram('l2_hat', l2_hat, collections=["model_summaries"]) # critic model_metrics critic_err_data = 1 - binary_accuracy( z_data, tf.ones(tf.shape(z_data), tf.bool), 0.0) critic_err_adv = 1 - binary_accuracy( z_adv, tf.zeros(tf.shape(z_adv), tf.bool), 0.0) # validation model_metrics err_df = 1 - slim.metrics.accuracy(test_model(X_v_hat_df)['pred'], y_v) err_df_all = 1 - slim.metrics.accuracy( test_model(X_v_hat_df_all)['pred'], outs_x_v['pred']) l2_v_hat = tf.sqrt(tf.reduce_sum((X_v - X_v_hat)**2, axis=reduce_ind)) l2_v_rec = tf.sqrt(tf.reduce_sum((X_v - X_v_rec)**2, axis=reduce_ind)) l1_v_rec = tf.reduce_sum(tf.abs(X_v - X_v_rec), axis=reduce_ind) l2_df = tf.sqrt(tf.reduce_sum((X_v - X_v_hat_df)**2, axis=reduce_ind)) l2_df_norm = l2_df / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind)) l2_df_all = tf.sqrt( tf.reduce_sum((X_v - X_v_hat_df_all)**2, axis=reduce_ind)) l2_df_norm_all = l2_df_all / tf.sqrt(tf.reduce_sum(X_v**2, axis=reduce_ind)) tf.summary.histogram('l2_df', l2_df, collections=["adv_summaries"]) tf.summary.histogram('l2_df_norm', l2_df_norm, collections=["adv_summaries"]) # model_metrics pretrain_model_metrics = OrderedDict([('nll', nll), ('weight_decay', weight_decay), ('err', err)]) model_metrics = OrderedDict([('loss', loss), ('nll', nll), ('l2_hat', tf.reduce_mean(l2_hat)), ('gan', gan), ('rec_l1', rec_l1), ('rec_l2', rec_l2), ('weight_decay', weight_decay), ('err', err), ('conf', conf), ('err_hat', err_hat), ('err_hat_f', err_hat_f), ('conf_t', tf.reduce_mean(target_conf_v[0])), ('conf_hat', conf_hat), ('conf_hat_f', conf_hat_f), ('err_rec', err_rec), ('conf_rec', conf_rec)]) critic_metrics = OrderedDict([('c_loss', critic_loss), ('c_gan', critic_gan), ('c_gan_data', critic_gan_data), ('c_gan_adv', critic_gan_adv), ('c_grad_norm', critic_grad_penalty), ('c_err_adv', critic_err_adv), ('c_err_data', critic_err_data)]) val_metrics = OrderedDict([('nll', nll_v), ('err', err_v)]) adv_metrics = OrderedDict([('l2_df', tf.reduce_mean(l2_df)), ('l2_df_norm', tf.reduce_mean(l2_df_norm)), ('l2_df_all', tf.reduce_mean(l2_df_all)), ('l2_df_all_norm', tf.reduce_mean(l2_df_norm_all)), ('l2_hat', tf.reduce_mean(l2_v_hat)), ('conf_hat', conf_v_hat), ('l1_rec', tf.reduce_mean(l1_v_rec)), ('l2_rec', tf.reduce_mean(l2_v_rec)), ('err_df', err_df), ('err_df_all', err_df_all)]) pretrain_metric_mean, pretrain_metric_upd = register_metrics( pretrain_model_metrics, collections="pretrain_model_summaries") metric_mean, metric_upd = register_metrics(model_metrics, collections="model_summaries") critic_metric_mean, critic_metric_upd = register_metrics( critic_metrics, collections="critic_summaries") val_metric_mean, val_metric_upd = register_metrics( val_metrics, prefix="val_", collections="val_summaries") adv_metric_mean, adv_metric_upd = register_metrics( adv_metrics, collections="adv_summaries") metrics_reset = tf.variables_initializer(tf.local_variables()) # training ops lr = tf.Variable(FLAGS.lr, trainable=False) critic_lr = tf.Variable(FLAGS.critic_lr, trainable=False) tf.summary.scalar('lr', lr, collections=["model_summaries"]) tf.summary.scalar('critic_lr', critic_lr, collections=["critic_summaries"]) optimizer = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.5) preinit_ops = tf.get_collection("PREINIT_OPS") with tf.control_dependencies(preinit_ops): pretrain_solver = optimizer.minimize(pretrain_loss, var_list=params) solver = optimizer.minimize(loss, var_list=params) critic_solver = (tf.train.AdamOptimizer( learning_rate=critic_lr, beta1=0.5).minimize(critic_loss, var_list=critic_params)) # train summary_images, summary_labels = select_balanced_subset( train_ds.images, train_ds.labels, num_classes, num_classes) summary_images = summary_images.transpose((0, 3, 1, 2)) save_path = os.path.join(FLAGS.samples_dir, 'orig.png') save_images(summary_images, save_path) if FLAGS.gpu_memory < 1.0: gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_memory) config = tf.ConfigProto(gpu_options=gpu_options) else: config = None with tf.Session(config=config) as sess: try: # summaries summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph) summaries = tf.summary.merge_all("model_summaries") critic_summaries = tf.summary.merge_all("critic_summaries") val_summaries = tf.summary.merge_all("val_summaries") adv_summaries = tf.summary.merge_all("adv_summaries") # initialization tf.local_variables_initializer().run() tf.global_variables_initializer().run() # pretrain model if FLAGS.pretrain_niter > 0: logging.info("Model pretraining") for epoch in range(1, FLAGS.pretrain_niter + 1): train_iterator = batch_iterator(train_ds.images, train_ds.labels, FLAGS.batch_size, shuffle=True) sess.run(metrics_reset) start_time = time.time() for ind, (images, labels) in enumerate(train_iterator): sess.run([pretrain_solver, pretrain_metric_upd], feed_dict={ X: images, y: labels }) str_bfr = six.StringIO() str_bfr.write("Pretrain epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time)) print_results_str(str_bfr, pretrain_model_metrics.keys(), sess.run(pretrain_metric_mean)) print_results_str(str_bfr, critic_metrics.keys(), sess.run(critic_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) # training for epoch in range(1, FLAGS.niter + 1): train_iterator = batch_iterator(train_ds.images, train_ds.labels, FLAGS.batch_size, shuffle=True) sess.run(metrics_reset) start_time = time.time() for ind, (images, labels) in enumerate(train_iterator): batch_index = (epoch - 1) * (train_ds.images.shape[0] // FLAGS.batch_size) + ind # train critic for several steps X_hat_np = sess.run(X_hat, feed_dict={X: images}) for _ in range(FLAGS.critic_steps - 1): sess.run([critic_solver], feed_dict={ X: images, y: labels, X_hat_h: X_hat_np }) else: summary = sess.run([ critic_solver, critic_metric_upd, critic_summaries ], feed_dict={ X: images, y: labels, X_hat_h: X_hat_np })[-1] summary_writer.add_summary(summary, batch_index) # train model summary = sess.run([solver, metric_upd, summaries], feed_dict={ X: images, y: labels })[-1] summary_writer.add_summary(summary, batch_index) str_bfr = six.StringIO() str_bfr.write("Train epoch [{}, {:.2f}s]:".format( epoch, time.time() - start_time)) print_results_str(str_bfr, model_metrics.keys(), sess.run(metric_mean)) print_results_str(str_bfr, critic_metrics.keys(), sess.run(critic_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) val_iterator = batch_iterator(val_ds.images, val_ds.labels, 100, shuffle=False) for images, labels in val_iterator: summary = sess.run([val_metric_upd, val_summaries], feed_dict={ X_v: images, y_v: labels })[-1] summary_writer.add_summary(summary, epoch) str_bfr = six.StringIO() str_bfr.write("Valid epoch [{}]:".format(epoch)) print_results_str(str_bfr, val_metrics.keys(), sess.run(val_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) # learning rate decay update_lr = lr_decay(lr, epoch) if update_lr is not None: sess.run(update_lr) logging.debug( "learning rate was updated to: {:.10f}".format( lr.eval())) critic_update_lr = lr_decay(critic_lr, epoch, prefix='critic_') if critic_update_lr is not None: sess.run(critic_update_lr) logging.debug( "critic learning rate was updated to: {:.10f}".format( critic_lr.eval())) if epoch % FLAGS.summary_frequency == 0: samples_hat, samples_rec, samples_df, summary = sess.run( [ X_v_hat, X_v_rec, X_v_hat_df, adv_summaries, adv_metric_upd ], feed_dict={ X_v: summary_images, y_v: summary_labels })[:-1] summary_writer.add_summary(summary, epoch) save_path = os.path.join(FLAGS.samples_dir, 'epoch_orig-%d.png' % epoch) save_images(summary_images, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch-%d.png' % epoch) save_images(samples_hat, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch_rec-%d.png' % epoch) save_images(samples_rec, save_path) save_path = os.path.join(FLAGS.samples_dir, 'epoch_df-%d.png' % epoch) save_images(samples_df, save_path) str_bfr = six.StringIO() str_bfr.write("Summary epoch [{}]:".format(epoch)) print_results_str(str_bfr, adv_metrics.keys(), sess.run(adv_metric_mean)) logging.info(str_bfr.getvalue()[:-1]) if FLAGS.checkpoint_frequency != -1 and epoch % FLAGS.checkpoint_frequency == 0: save_checkpoint(sess, vars, epoch=epoch) save_checkpoint(sess, critic_vars, name="critic_model", epoch=epoch) except KeyboardInterrupt: logging.debug("Keyboard interrupt. Stopping training...") except NanError as e: logging.info(e) finally: sess.run(metrics_reset) save_checkpoint(sess, vars) save_checkpoint(sess, critic_vars, name="critic_model") # final accuracy test_iterator = batch_iterator(test_ds.images, test_ds.labels, 100, shuffle=False) for images, labels in test_iterator: sess.run([val_metric_upd], feed_dict={X_v: images, y_v: labels}) str_bfr = six.StringIO() str_bfr.write("Final epoch [{}]:".format(epoch)) for metric_name, metric_value in zip(val_metrics.keys(), sess.run(val_metric_mean)): str_bfr.write(" {}: {:.6f},".format(metric_name, metric_value)) logging.info(str_bfr.getvalue()[:-1])
def main(args): if not torch.cuda.is_available(): raise NotImplementedError("Training on CPU is not supported.") utils.setup_experiment(args) utils.init_logging(args) train_loaders, valid_loaders = data.build_dataset( args.dataset, args.data_path, batch_size=args.batch_size) model = models.build_model(args).cuda() optimizer = optim.build_optimizer(args, model.parameters()) logging.info( f"Built a model consisting of {sum(p.numel() for p in model.parameters() if p.requires_grad):,} parameters" ) meters = { name: utils.RunningAverageMeter(0.98) for name in (["loss", "context", "graph", "target"]) } acc_names = ["overall" ] + [f"task{idx}" for idx in range(len(valid_loaders))] acc_meters = {name: utils.AverageMeter() for name in acc_names} writer = SummaryWriter( log_dir=args.experiment_dir) if not args.no_visual else None global_step = -1 for epoch in range(args.num_epochs): acc_tasks = {f"task{idx}": None for idx in range(len(valid_loaders))} for task_id, train_loader in enumerate(train_loaders): for repeat in range(args.num_repeats_per_task): train_bar = utils.ProgressBar(train_loader, epoch, prefix=f"task {task_id}") for meter in meters.values(): meter.reset() for batch_id, (images, labels) in enumerate(train_bar): model.train() global_step += 1 images, labels = images.cuda(), labels.cuda() outputs = model(images, labels, task_id=task_id) if global_step == 0: continue loss = outputs["loss"] model.zero_grad() loss.backward() optimizer.step() meters["loss"].update(loss.item()) meters["context"].update(outputs["context_loss"].item()) meters["target"].update(outputs["target_loss"].item()) meters["graph"].update(outputs["graph_loss"].item()) train_bar.log(dict( **meters, lr=optimizer.get_lr(), )) if writer is not None: writer.add_scalar("loss/train", loss.item(), global_step) gradients = torch.cat([ p.grad.view(-1) for p in model.parameters() if p.grad is not None ], dim=0) writer.add_histogram("gradients", gradients, global_step) model.eval() for meter in acc_meters.values(): meter.reset() for idx, valid_loader in enumerate(valid_loaders): valid_bar = utils.ProgressBar(valid_loader, epoch, prefix=f"task {task_id}") for batch_id, (images, labels) in enumerate(valid_bar): model.eval() with torch.no_grad(): images, labels = images.cuda(), labels.cuda() outputs = model.predict(images, labels, task_id=idx) correct = outputs["preds"].eq(labels).sum().item() acc_meters[f"task{idx}"].update(100 * correct, n=len(images)) acc_meters["overall"].update(acc_meters[f"task{idx}"].avg) acc_tasks[f"task{task_id}"] = acc_meters[f"task{task_id}"].avg if writer is not None: for name, meter in acc_meters.items(): writer.add_scalar(f"accuracy/{name}", meter.avg, global_step) logging.info( train_bar.print( dict(**meters, **acc_meters, lr=optimizer.get_lr()))) utils.save_checkpoint(args, global_step, model, optimizer, score=acc_meters["overall"].avg, mode="max") bwt = sum(acc_meters[task].avg - acc for task, acc in acc_tasks.items()) / (len(valid_loaders) - 1) logging.info( f"Done training! Final accuracy {acc_meters['overall'].avg:.4f}, backward transfer {bwt:.4f}." )
def main(args): device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') utils.setup_experiment(args) utils.init_logging(args) # Build data loaders, a model and an optimizer model = models.build_model(args).to(device) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[50, 60, 70, 80, 90, 100], gamma=0.5) logging.info(f"Built a model consisting of {sum(p.numel() for p in model.parameters()):,} parameters") if args.resume_training: state_dict = utils.load_checkpoint(args, model, optimizer, scheduler) global_step = state_dict['last_step'] start_epoch = int(state_dict['last_step']/(403200/state_dict['args'].batch_size))+1 else: global_step = -1 start_epoch = 0 train_loader, valid_loader, _ = data.build_dataset(args.dataset, args.data_path, batch_size=args.batch_size) # Track moving average of loss values train_meters = {name: utils.RunningAverageMeter(0.98) for name in (["train_loss", "train_psnr", "train_ssim"])} valid_meters = {name: utils.AverageMeter() for name in (["valid_psnr", "valid_ssim"])} writer = SummaryWriter(log_dir=args.experiment_dir) if not args.no_visual else None for epoch in range(start_epoch, args.num_epochs): if args.resume_training: if epoch %10 == 0: optimizer.param_groups[0]["lr"] /= 2 print('learning rate reduced by factor of 2') train_bar = utils.ProgressBar(train_loader, epoch) for meter in train_meters.values(): meter.reset() for batch_id, inputs in enumerate(train_bar): model.train() global_step += 1 inputs = inputs.to(device) noise = utils.get_noise(inputs, mode = args.noise_mode, min_noise = args.min_noise/255., max_noise = args.max_noise/255., noise_std = args.noise_std/255.) noisy_inputs = noise + inputs; outputs = model(noisy_inputs) loss = F.mse_loss(outputs, inputs, reduction="sum") / (inputs.size(0) * 2) model.zero_grad() loss.backward() optimizer.step() train_psnr = utils.psnr(outputs, inputs) train_ssim = utils.ssim(outputs, inputs) train_meters["train_loss"].update(loss.item()) train_meters["train_psnr"].update(train_psnr.item()) train_meters["train_ssim"].update(train_ssim.item()) train_bar.log(dict(**train_meters, lr=optimizer.param_groups[0]["lr"]), verbose=True) if writer is not None and global_step % args.log_interval == 0: writer.add_scalar("lr", optimizer.param_groups[0]["lr"], global_step) writer.add_scalar("loss/train", loss.item(), global_step) writer.add_scalar("psnr/train", train_psnr.item(), global_step) writer.add_scalar("ssim/train", train_ssim.item(), global_step) gradients = torch.cat([p.grad.view(-1) for p in model.parameters() if p.grad is not None], dim=0) writer.add_histogram("gradients", gradients, global_step) sys.stdout.flush() if epoch % args.valid_interval == 0: model.eval() for meter in valid_meters.values(): meter.reset() valid_bar = utils.ProgressBar(valid_loader) for sample_id, sample in enumerate(valid_bar): with torch.no_grad(): sample = sample.to(device) noise = utils.get_noise(sample, mode = 'S', noise_std = (args.min_noise + args.max_noise)/(2*255.)) noisy_inputs = noise + sample; output = model(noisy_inputs) valid_psnr = utils.psnr(output, sample) valid_meters["valid_psnr"].update(valid_psnr.item()) valid_ssim = utils.ssim(output, sample) valid_meters["valid_ssim"].update(valid_ssim.item()) if writer is not None and sample_id < 10: image = torch.cat([sample, noisy_inputs, output], dim=0) image = torchvision.utils.make_grid(image.clamp(0, 1), nrow=3, normalize=False) writer.add_image(f"valid_samples/{sample_id}", image, global_step) if writer is not None: writer.add_scalar("psnr/valid", valid_meters['valid_psnr'].avg, global_step) writer.add_scalar("ssim/valid", valid_meters['valid_ssim'].avg, global_step) sys.stdout.flush() logging.info(train_bar.print(dict(**train_meters, **valid_meters, lr=optimizer.param_groups[0]["lr"]))) utils.save_checkpoint(args, global_step, model, optimizer, score=valid_meters["valid_psnr"].avg, mode="max") scheduler.step() logging.info(f"Done training! Best PSNR {utils.save_checkpoint.best_score:.3f} obtained after step {utils.save_checkpoint.best_step}.")
def ddpg_multiagent(env, agent1, agent2, cfg, db): # Get configuration n_episodes = cfg["Training"]["Number_episodes"] max_t = cfg["Training"]["Max_timesteps"] print_every = cfg["Training"]["Score_window"] starting_random = cfg["Training"]["Starting_random"] brain_index = cfg["Agent"]["Brain_index"] persist_mongodb = cfg["Training"]["Persist_mongodb"] success = cfg["Environment"]["Success"] #Initialize score lists scores_deque = deque(maxlen=print_every) scores = [] # Create a directory to save the findings. # experiment_dir = setup_experiment(cfg) if persist_mongodb: experiment_id = setup_experiment(db, cfg) brain_name = env.brain_names[brain_index] # Train for n_episodes for i_episode in range(1, n_episodes + 1): env_info = env.reset(train_mode=True)[brain_name] n_agents = len(env_info.agents) states = env_info.vector_observations agent1.reset() agent2.reset() actions = np.zeros((n_agents, agent1.action_size)) score = np.zeros(n_agents) for t in range(max_t): if i_episode < starting_random: actions = 2 * np.random.randn(n_agents, agent1.action_size) - 1.0 else: actions[0, :] = agent1.act(states[0, :]) actions[1, :] = agent2.act(states[1, :]) # for i_agent in range(n_agents): # actions[i_agent, :] = agent.act(states[i_agent, :]) next_states, rewards, dones, _ = step_unity( env, actions, brain_name) for i_agent in range(n_agents): # Add experience to both of the agent's replay buffers agent1.step(states[i_agent, :], actions[i_agent, :], rewards[i_agent], next_states[i_agent, :], dones[i_agent]) agent2.step(states[i_agent, :], actions[i_agent, :], rewards[i_agent], next_states[i_agent, :], dones[i_agent]) states = next_states score += rewards if np.any(dones): break scores_deque.append(score) scores.append(score) mean_score = np.vstack(scores_deque).mean(axis=0).max() print("\rEpisode {}\tAverage Score: {:.4f}\tNoise Modulation: {:.3f}". format(i_episode, mean_score, agent1.noise_modulation), end="") # print("\rEpisode {}\tAverage Score: {}".format(i_episode, scores_deque), end="") visualize = False if i_episode % print_every == 0: # persist_experiment(experiment_dir, i_episode, agent, scores) if persist_mongodb: persist_experiment(db, experiment_id, i_episode, agent1, scores, print_every) persist_experiment(db, experiment_id, i_episode, agent2, scores, print_every) else: torch.save(agent1.actor_local.state_dict(), f"checkpoint_actor_1_{i_episode}.pth") torch.save(agent1.critic_local.state_dict(), f"checkpoint_critic_1_{i_episode}.pth") torch.save(agent2.actor_local.state_dict(), f"checkpoint_actor_2_{i_episode}.pth") torch.save(agent2.critic_local.state_dict(), f"checkpoint_critic_2_{i_episode}.pth") print("\rEpisode {}\tAverage Score: {:.4f}".format( i_episode, mean_score)) if mean_score >= success: # This is going to be the first thing I'd be digging in the database for, # so here you go. fpath_actor_1 = "checkpoint_actor_1_winner_{}.pth".format( i_episode) fpath_critic_1 = "checkpoint_critic_1_winner_{}.pth".format( i_episode) torch.save(agent1.actor_local.state_dict(), fpath_actor_1) torch.save(agent1.critic_local.state_dict(), fpath_critic_1) fpath_actor_2 = "checkpoint_actor_2_winner_{}.pth".format( i_episode) fpath_critic_2 = "checkpoint_critic_2_winner_{}.pth".format( i_episode) torch.save(agent2.actor_local.state_dict(), fpath_actor_2) torch.save(agent2.critic_local.state_dict(), fpath_critic_2) pkl_dump(scores, "scores_winner.pkl") break return scores
def ddpg_selfplay(env, agent, cfg, db): # Get configuration n_episodes = cfg["Training"]["Number_episodes"] max_t = cfg["Training"]["Max_timesteps"] print_every = cfg["Training"]["Score_window"] starting_random = cfg["Training"]["Starting_random"] brain_index = cfg["Agent"]["Brain_index"] dump_agent = cfg["Training"]["Dump_agent"] success = cfg["Environment"]["Success"] persist_mongodb = cfg["Training"]["Persist_mongodb"] pretrained = cfg["Training"]["Pretrained"] agent.update_every = max_t * starting_random * 2 #Initialize score lists scores_deque = deque(maxlen=print_every) scores = [] # Create a directory to save the findings. # experiment_dir = setup_experiment(cfg) experiment_id = setup_experiment(db, cfg) print("Experiment ID: {}".format(experiment_id)) brain_name = env.brain_names[brain_index] # Train for n_episodes for i_episode in range(1, n_episodes + 1): env_info = env.reset(train_mode=True)[brain_name] n_agents = len(env_info.agents) states = env_info.vector_observations agent.reset() actions = np.zeros((n_agents, agent.action_size)) score = np.zeros(n_agents) if i_episode == starting_random and pretrained: print("Loading pre-trained weights") agent = load_agent_weights(agent, cfg) print("Pre-trained weights loaded!") agent.update_every = cfg["Agent"]["Update_every"] for t in range(max_t): if i_episode < starting_random: actions = 2 * np.random.rand(n_agents, agent.action_size) - 1.0 else: for i_agent in range(n_agents): actions[i_agent, :] = agent.act(states[i_agent, :]) next_states, rewards, dones, _ = step_unity( env, actions, brain_name) for i_agent in range(n_agents): # Add experience to both of the agent's replay buffers agent.step(states[i_agent, :], actions[i_agent, :], rewards[i_agent], next_states[i_agent, :], dones[i_agent]) states = next_states score += rewards if np.any(dones): break scores_deque.append(score) scores.append(score) mean_score = np.vstack(scores_deque).max(axis=1).mean() print( "\rEpisode {}\tAverage Score: {:.4f}\t Score: {:.3f} {:.3f} noise {:.2f}" .format(i_episode, mean_score, score[0], score[1], agent.noise_modulation), end="") # print("\rEpisode {}\tAverage Score: {}".format(i_episode, scores_deque), end="") visualize = False if i_episode % print_every == 0: print("\rEpisode {}\tAverage Score: {:.4f}".format( i_episode, mean_score)) if persist_mongodb: persist_experiment(db, experiment_id, i_episode, agent, scores, print_every) else: torch.save(agent.actor_local.state_dict(), f"checkpoint_actor_{i_episode}.pth") torch.save(agent.critic_local.state_dict(), f"checkpoint_critic_{i_episode}.pth") if i_episode % dump_agent == 0: # To heck with it let's save some to disk even though I have utilities to load. # It's important. fpath_actor = "checkpoint_actor_{}.pth".format(i_episode) fpath_critic = "checkpoint_critic_{}.pth".format(i_episode) torch.save(agent.actor_local.state_dict(), fpath_actor) torch.save(agent.critic_local.state_dict(), fpath_critic) if mean_score >= success: # This is going to be the first thing I'd be digging in the database for, # so here you go. fpath_actor = "checkpoint_actor_winner_{}.pth".format(i_episode) fpath_critic = "checkpoint_critic_winner_{}.pth".format(i_episode) torch.save(agent.actor_local.state_dict(), fpath_actor) torch.save(agent.critic_local.state_dict(), fpath_critic) pkl_dump(scores, "scores_winner.pkl") break return scores