def num_param(args): TT = args.fc_tensorized config = json.load(open(CONFIG_DIR + args.config, 'r')) D = Discriminator(config, TT) G = Generator(config) G_params = sum(p.numel() for p in G.parameters() if p.requires_grad) D_params = sum(p.numel() for p in D.parameters() if p.requires_grad) params = G_params + D_params print("The model has:{} parameters".format(params))
def main(env_name, n_epochs, eval_frequency, actor_net_dim, critic_net_dim, dsicriminator_net_dim, lr, gamma, tau, grad_clip, batch_size, entropy_weight, min_buffer_size, clip, ppo_updates, expert, activation, value_coef, betas, max_steps, tag, record): seed = np.random.randint(0, 1000000) import pybulletgym discriminator_updates = 1 expert, activation = initiate_run( env_name, actor_net_dim, critic_net_dim, dsicriminator_net_dim, lr, gamma, tau, grad_clip, batch_size, entropy_weight, min_buffer_size, clip, ppo_updates, discriminator_updates, expert, activation, value_coef, betas, max_steps, seed, tag, record) env = Env(env_name) actor = Actor(env, actor_net_dim, activation, env.env.action_space.high, env.env.action_space.low) critic = Critic(env, critic_net_dim, activation) discriminator = Discriminator(env, dsicriminator_net_dim, lr, batch_size, activation, betas) agent = Agent(gamma, clip, actor, critic, lr, batch_size, grad_clip, entropy_weight, value_coef, betas) memory = PPOMemory(gamma, tau) args = [ min_buffer_size, eval_frequency, ppo_updates, discriminator_updates, expert, seed ] gail = GAIL(env, actor, critic, discriminator, agent, memory, *args) epoch_to_best = gail.update(n_epochs, max_steps, record) if record: neptune.log_metric('best_epoch', epoch_to_best) neptune.stop()
def __init__(self, opt): super(LMGan, self).__init__() self.opt = opt self.generator = Generator(opt.vocab_size, opt.embedding_size, opt.hidden_size, opt.device) self.discriminator = Discriminator( opt.hidden_size, opt.d_hidden_size, opt.d_linear_size, opt.d_dropout, opt.device) if opt.adversarial else None
def __init__(self, config): super(GANAEL, self).__init__() self.config = config embedding = nn.Embedding(config.vocab_size, config.embedding_size) self.generator = Generator(config, embedding) self.discriminator = Discriminator(config, embedding) # tied embedding self.generator.embedding.weight.data = self.discriminator.embedding.weight.data
def main(): args = setup_parser() config = load_config_from_yaml(args.config) config.set_args(args) env_name = config['env'].get() env = gym.make(env_name) state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] seed = np.random.randint(0, 100000) torch.manual_seed(seed) np.random.seed(seed) actor = Actor(state_dim, action_dim, config['actor_net_dim'].get(), config['lr'].get(), config['betas'].get()).to( device) expert = Expert(config['expert'].get(), state_dim) discriminator = Discriminator(state_dim, action_dim, config['discriminator_net_dim'].get(), config['lr'].get(), config['betas'].get(), expert, actor, config['batch_size'].get()).to(device) gail = GAIL(actor, discriminator) # training procedure start_time = time.time() max_reward = float('-inf') n_epochs = config['n_epochs'].get() n_iter = config['n_iter'].get() eval_frequency = config['eval_frequency'].get() n_eval_episodes = config['n_eval_episodes'].get() max_timesteps = config['max_timesteps'].get() for epoch in range(1, n_epochs + 1): # update policy n_iter times params = gail.update(n_iter) if epoch % eval_frequency == 0: # evaluate in environment avg_reward = evaluate(n_eval_episodes, env, max_timesteps, gail) if avg_reward > max_reward: torch.save(params, "./actor_weights" + str(env_name) + str(seed)) max_reward = max(avg_reward, max_reward) print("Epoch: {}\tAvg Reward: {} in {}".format(epoch, avg_reward, datetime.timedelta( seconds=(time.time() - start_time).__round__(0))))
if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(os.path.join(log_dir, os.path.basename(opt.config))): copy(opt.config, log_dir) generator = MotionTransferGenerator( **config['model_params']['generator_params'], **config['model_params']['common_params'], use_generator_attention=opt.use_generator_attention, use_dmm_attention=opt.use_dmm_attention) generator.to(opt.device_ids[0]) if opt.verbose: print(generator) discriminator = Discriminator( **config['model_params']['discriminator_params'], **config['model_params']['common_params'], use_attention=opt.use_discriminator_attention) discriminator.to(opt.device_ids[0]) if opt.verbose: print(discriminator) kp_detector = KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params']) kp_detector.to(opt.device_ids[0]) if opt.verbose: print(kp_detector) # dataset = FramesDataset(is_train=(opt.mode == 'train'), **config['dataset_params']) dataset = PairedFramesDataset(is_train=(opt.mode == 'train'), **config['dataset_params'])
def getColumnsToDrop(): '''Returns the columns to not find coefficients for from the discriminator.''' d = Discriminator() return [modules.common_functions.processColumnName(i) for i in d.columns_to_retain]
log_dir += ' ' + strftime("%d-%m-%y %H:%M:%S", gmtime()) if not os.path.exists(log_dir): os.makedirs(log_dir) if not os.path.exists(os.path.join(log_dir, os.path.basename(opt.config))): copy(opt.config, log_dir) generator = MotionTransferGenerator( **config['model_params']['generator_params'], **config['model_params']['common_params']) generator.to(opt.device_ids[0]) if opt.verbose: print(generator) discriminator = Discriminator( **config['model_params']['discriminator_params'], **config['model_params']['common_params']) discriminator.to(opt.device_ids[0]) if opt.verbose: print(discriminator) kp_detector = KPDetector(**config['model_params']['kp_detector_params'], **config['model_params']['common_params']) kp_detector.to(opt.device_ids[0]) if opt.verbose: print(kp_detector) dataset = FramesDataset(is_train=(opt.mode == 'train'), **config['dataset_params']) if opt.mode == 'train':
def training_process(device, nb_class_labels, model_path, result_dir, patience, epochs, do_pre_train, tr_feat_path, tr_labels_path, val_feat_path, val_labels_path, tr_batch_size, val_batch_size, adapt_patience, adapt_epochs, d_lr, tgt_lr, update_cnt, factor): """Implements the complete training process of the AUDASC method. :param device: The device that we will use. :type device: str :param nb_class_labels: The amount of labels for label classification. :type nb_class_labels: int :param model_path: The path of previously saved model (if any) :type model_path: str :param result_dir: The directory to save newly pre-trained model. :type result_dir: str :param patience: The patience for the pre-training step. :type patience: int :param epochs: The epochs for the pre-training step. :type epochs: int :param do_pre_train: Flag to indicate if we do pre-training. :type do_pre_train: bool :param tr_feat_path: The path for loading the training features. :type tr_feat_path: str :param tr_labels_path: The path for loading the training labels. :type tr_labels_path: str :param val_feat_path: The path for loading the validation features. :type val_feat_path: str :param val_labels_path: The path for loading the validation labels. :type val_labels_path: str :param tr_batch_size: The batch used for pre-training. :type tr_batch_size: int :param val_batch_size: The batch size used for validation. :type val_batch_size: int :param adapt_patience: The patience for the domain adaptation step. :type adapt_patience: int :param adapt_epochs: The epochs for the domain adaptation step. :type adapt_epochs: int :param d_lr: The learning rate for the discriminator. :type d_lr: float :param tgt_lr: The learning rate for the adapted model. :type tgt_lr: float :param update_cnt: An update controller for adversarial loss :type update_cnt: int :param factor: the coefficient used to be multiplied by classification loss. :type factor: int """ tr_feat = device_exchange(file_io.load_pickled_features(tr_feat_path), device=device) tr_labels = device_exchange(file_io.load_pickled_features(tr_labels_path), device=device) val_feat = device_exchange(file_io.load_pickled_features(val_feat_path), device=device) val_labels = device_exchange( file_io.load_pickled_features(val_labels_path), device=device) loss_func = functional.cross_entropy non_adapted_cnn = Model().to(device) label_classifier = LabelClassifier(nb_class_labels).to(device) if not path.exists(result_dir): makedirs(result_dir) if do_pre_train: state_dict_path = result_dir printing.info_msg('Pre-training step') optimizer_source = torch.optim.Adam( list(non_adapted_cnn.parameters()) + list(label_classifier.parameters()), lr=1e-4) pre_training.pre_training(model=non_adapted_cnn, label_classifier=label_classifier, optimizer=optimizer_source, tr_batch_size=tr_batch_size, val_batch_size=val_batch_size, tr_feat=tr_feat['A'], tr_labels=tr_labels['A'], val_feat=val_feat['A'], val_labels=val_labels['A'], epochs=epochs, criterion=loss_func, patience=patience, result_dir=state_dict_path) del optimizer_source else: printing.info_msg('Loading a pre-trained non-adapted model') state_dict_path = model_path if not path.exists(state_dict_path): raise ValueError( 'The path for loading the pre trained model does not exist!') non_adapted_cnn.load_state_dict( torch.load(path.join(state_dict_path, 'non_adapted_cnn.pytorch'))) label_classifier.load_state_dict( torch.load(path.join(state_dict_path, 'label_classifier.pytorch'))) printing.info_msg('Training the Adversarial Adaptation Model') target_cnn = Model().to(device) target_cnn.load_state_dict(non_adapted_cnn.state_dict()) discriminator = Discriminator(2).to(device) target_model_opt = torch.optim.Adam(target_cnn.parameters(), lr=tgt_lr) discriminator_opt = torch.optim.Adam(discriminator.parameters(), lr=d_lr) domain_adaptation.domain_adaptation( non_adapted_cnn, target_cnn, label_classifier, discriminator, target_model_opt, discriminator_opt, loss_func, loss_func, loss_func, tr_feat, tr_labels, val_feat, val_labels, adapt_epochs, update_cnt, result_dir, adapt_patience, device, factor)
def train(args): pre_trained = args.pre_trained PATH = args.path_results lrD = args.lrD lrG = args.lrG epochs = args.epochs batch_size = args.batch device = args.device save_every = args.save_every data = args.data config = json.load(open(CONFIG_DIR + args.config, 'r')) TT = args.fc_tensorized print(TT) # Create directory for results if not os.path.isdir(PATH): os.mkdir(PATH) # Create directory for specific run if TT: PATH = PATH + "/{}_ttfc".format(config["id"]) else: PATH = PATH + "/{}".format(config["id"]) if not os.path.isdir(PATH): os.mkdir(PATH) if not os.path.isdir(PATH + '/Random_results'): os.mkdir(PATH + '/Random_results') if not os.path.isdir(PATH + '/Fixed_results'): os.mkdir(PATH + '/Fixed_results') print("### Loading data ###") train_loader = load_dataset(data, batch_size, is_train=True) print("### Loaded data ###") print("### Create models ###") D = Discriminator(config, TT).to(device) G = Generator(config).to(device) model_parameters = filter(lambda p: p.requires_grad, D.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) model_parameters = filter(lambda p: p.requires_grad, G.parameters()) params += sum([np.prod(p.size()) for p in model_parameters]) print("The model has:{} parameters".format(params)) if pre_trained: D.encoder.load() G.decoder.load() G_optimizer = optim.Adam(G.parameters(), lr=lrG, betas=(0.5, 0.999)) D_optimizer = optim.Adam(D.parameters(), lr=lrD, betas=(0.5, 0.999)) train_hist = {'D_losses': [], 'G_losses': [], 'G_fix_losses': []} BCE_loss = nn.BCELoss() fixed_z_ = torch.randn((5 * 5, 100)).to(device) # fixed noise for epoch in range(epochs): if epoch == 1 or epoch % save_every == 0: D_test = copy.deepcopy(D) D_losses = [] G_losses = [] G_fix_losses = [] for x, _ in train_loader: x = x.to(device) D_loss = D.train_step(x, G, D_optimizer, BCE_loss, device) G_loss = G.train_step(D, batch_size, G_optimizer, BCE_loss, device) # G_fix_loss = G.evaluate( # D_test, # batch_size, # BCE_loss, # device # ) D_losses.append(D_loss) G_losses.append(G_loss) # G_fix_losses.append(G_fix_loss) meanDloss = torch.mean(torch.FloatTensor(D_losses)) meanGloss = torch.mean(torch.FloatTensor(G_losses)) meanGFloss = torch.mean(torch.FloatTensor(G_fix_losses)) train_hist['D_losses'].append(meanDloss) train_hist['G_losses'].append(meanGloss) train_hist['G_fix_losses'].append(meanGFloss) print( "[{:d}/{:d}]: loss_d: {:.3f}, loss_g: {:.3f}, loss_g_fix: {:.3f}". format(epoch + 1, epochs, meanDloss, meanGloss, meanGFloss)) p = PATH + '/Random_results/MNIST_DCGAN_' + str(epoch + 1) + '.png' fixed_p = PATH + '/Fixed_results/MNIST_DCGAN_' + str(epoch + 1) + '.png' z_ = torch.randn((5 * 5, 100)).to(device) show_result(G, 100, fixed_z_, z_, (epoch + 1), save=True, path=p, isFix=False) show_result(G, 100, fixed_z_, z_, (epoch + 1), save=True, path=fixed_p, isFix=True) print("Training complete. Saving.") save_models(D, G, PATH, train_hist, epochs) show_train_hist(train_hist, save=True, path=PATH + '/MNIST_DCGAN_train_hist.png') save_gif(PATH, epochs) return D, G
if __name__ == '__main__': print('Now processing the .csv files. This will take a while...') # Load the .csv files in the dataset folder into a list csv_files = [os.path.join(DATASET_PATH, i) for i in \ os.listdir(DATASET_PATH) if \ str(i).endswith('.csv')] # Create the output folder try: os.mkdir(OUTPUT_PATH) except FileExistsError: pass except OSError as e: print(f'Could not create a folder for the output {e}. Create the \ folder manually and try again.') # Start a new thread for each .csv file and process them threads = [] disc = Discriminator() for file in csv_files: proc = Thread(target=processFile, args=[file, disc.columns, disc.columns_to_retain]) threads.append(proc) proc.start() for thread in threads: thread.join() # Print message when all tasks are complete print(f'Done! The new .csv files have been output to {OUTPUT_PATH}.')
def objective(trial): n_epochs = 20000 eval_frequency = 100 max_steps = 300 actor_net_dim = (128, 128) critic_net_dim = (128, 128) dsicriminator_net_dim = (128, 128) lr = trial.suggest_float("lr", 0, 0.01) gamma = trial.suggest_float("gamma", 0.9, 1) tau = trial.suggest_float("tau", 0.9, 1) grad_clip = 40 batch_size = 2**trial.suggest_int('batch_size', 4, 8) betas = (0.9, 0.999) entropy_weight = trial.suggest_float("entropy_weight", 0, 0.1) min_buffer_size = 2048 clip = trial.suggest_float("clip", 0, 0.5) ppo_updates = trial.suggest_int('ppo_updates', 1, 20) expert = trial.suggest_categorical("expert", ["1", "3", "10"]) value_coef = trial.suggest_float("value_coef", 0, 1) activation = "tanh" env_name = "LunarLander-v2" record = True if expert == "1": expert = 1 elif expert == "3": expert = 3 else: expert = 10 seed = 99 discriminator_updates = 1 expert, activation = initiate_run( env_name, actor_net_dim, critic_net_dim, dsicriminator_net_dim, lr, gamma, tau, grad_clip, batch_size, entropy_weight, min_buffer_size, clip, ppo_updates, discriminator_updates, expert, activation, value_coef, betas, max_steps, seed, "", record) env = Env(env_name) actor = Actor(env, actor_net_dim, activation) critic = Critic(env, critic_net_dim, activation) discriminator = Discriminator(env, dsicriminator_net_dim, lr, batch_size, activation, betas) agent = Agent(gamma, clip, actor, critic, lr, batch_size, grad_clip, entropy_weight, value_coef, betas) memory = PPOMemory(gamma, tau) args = [ min_buffer_size, eval_frequency, ppo_updates, discriminator_updates, expert, seed ] gail = GAIL(env, actor, critic, discriminator, agent, memory, *args) epoch_to_best = gail.update(n_epochs, max_steps, record) if record: neptune.log_metric('best_epoch', epoch_to_best) neptune.stop() sys.stdout.flush() return epoch_to_best