Exemplo n.º 1
0
def ddpg(env_id,
         timesteps,
         policy="MlpPolicy",
         log_interval=None,
         tensorboard_log=None,
         seed=None,
         load_weights=None):
    from stable_baselines import DDPG

    env = gym.make(env_id)

    n_actions = env.action_space.shape[-1]
    param_noise = None
    action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions),
                                                sigma=float(0.5) *
                                                np.ones(n_actions))

    if load_weights is not None:
        model = DDPG.load(load_weights, env=env)
    else:
        model = DDPG(policy,
                     env,
                     verbose=1,
                     param_noise=param_noise,
                     action_noise=action_noise,
                     tensorboard_log=tensorboard_log)

    callback = WandbRenderEnvCallback(model_name="ddpg", env_name=env_id)

    model.learn(total_timesteps=timesteps,
                log_interval=log_interval,
                callback=callback)
    save_model_weights(model, "ddpg", env_id, policy, seed=seed, path=".")
Exemplo n.º 2
0
def sac(env_id,
        timesteps,
        policy="MlpPolicy",
        log_interval=None,
        tensorboard_log=None,
        seed=None):
    env = gym.make(env_id)

    model = SAC(policy, env, verbose=1, tensorboard_log=tensorboard_log)
    model.learn(total_timesteps=timesteps, log_interval=log_interval)

    save_model_weights(model, "sac", env_id, policy, seed)
Exemplo n.º 3
0
def trpo(env_id,
         timesteps,
         policy="MlpPolicy",
         log_interval=None,
         tensorboard_log=None,
         seed=None):
    from stable_baselines import TRPO
    env = gym.make(env_id)

    model = TRPO(policy, env, verbose=1, tensorboard_log=tensorboard_log)
    model.learn(total_timesteps=timesteps, log_interval=log_interval)

    save_model_weights(model, "trpo", env_id, policy, seed)
def main():
    """
    The main function of the project.
    It iterates over all experiments in the config file, performs the experiment
     and saves its results to external files.
    :return: None.
    """

    experiment_config_path = _parse_input()
    all_experiments = read_experiments_config(experiment_config_path)

    for experiment_name, experiment_config in all_experiments.items():
        with tf.Session() as sess:
            sess.run(tf.global_variables_initializer())
            results, model = perform_experiment(experiment_config)
            weights_file_name = save_model_weights(experiment_name, model)
            testing_layers_files = save_layers_logs(
                results['Layers Testing Output'], 'Testing')
            training_layers_files = save_layers_logs(
                results['Layers Training Output'], 'Training')

            results.pop('Layers Training Output')
            results.pop('Layers Testing Output')
            print("Testing Data Confusion Matrix")
            print(np.array2string(results['Confusion Matrix']))
            results['Confusion Matrix'] = str(
                results['Confusion Matrix'].tolist())
            print("Experiment Results:")
            print(json.dumps(results, indent=2, sort_keys=True))

            results_file = save_experiment_log(results, experiment_name)
            upload_to_s3([], [], [results_file], [weights_file_name],
                         testing_layers_files + training_layers_files)
Exemplo n.º 5
0
def init_ppo2(env_id,
              timesteps,
              policy="MlpPolicy",
              log_interval=None,
              tensorboard_log=None,
              seed=None):
    multiprocess_env = make_vec_env(env_id, n_envs=4)

    model = PPO2(policy,
                 multiprocess_env,
                 verbose=1,
                 tensorboard_log=tensorboard_log)

    callback = WandbRenderEnvCallback()

    model.learn(total_timesteps=timesteps,
                log_interval=log_interval,
                callback=callback)

    save_model_weights(model, "ppo2", env_id, policy, seed, path=".")
Exemplo n.º 6
0
def td3(env_id,
        timesteps,
        policy="MlpPolicy",
        log_interval=None,
        tensorboard_log=None,
        seed=None):
    from stable_baselines.ddpg.noise import NormalActionNoise
    env = gym.make(env_id)

    # The noise objects for TD3
    n_actions = env.action_space.shape[-1]
    action_noise = NormalActionNoise(mean=np.zeros(n_actions),
                                     sigma=0.1 * np.ones(n_actions))

    model = TD3(policy,
                env,
                action_noise=action_noise,
                verbose=1,
                tensorboard_log=tensorboard_log)
    model.learn(total_timesteps=timesteps, log_interval=log_interval)

    save_model_weights(model, "td3", env_id, policy, seed)
Exemplo n.º 7
0
def fit(model,
        train_dataset, val_dataset,
        loss_config,
        epochs=5,
        batch_size=8,
        acc_steps=1,
        weight_decay=0,
        warmup_prop=0.0,
        lr=5e-4,
        cp=False):

    best_jac = 0

    len_sampler = utils.LenMatchBatchSampler(
        torch.utils.data.RandomSampler(train_dataset),
        batch_size=batch_size, drop_last=True)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_sampler=len_sampler, num_workers=4)

    val_loader = torch.utils.data.DataLoader(
        val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

    # betas=(0.5, 0.999))
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    optimizer = torchcontrib.optim.SWA(optimizer)

    swa_first_epoch = 5

    n_steps = float(epochs * len(train_loader)) / float(acc_steps)
    num_warmup_steps = int(warmup_prop * n_steps)

    scheduler = transformers.get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps, n_steps)

    total_steps = 0
    for epoch in range(epochs):
        model.train()
        start_time = time.time()

        optimizer.zero_grad()
        avg_loss = 0

        for step, data in enumerate(train_loader):
            total_steps += 1

            start_logits, end_logits = model(data['ids'].cuda(),
                                             data['sentiment_input'].cuda(),
                                             data['probas_start'].cuda(),
                                             data['probas_end'].cuda())

            loss = loss_fn(start_logits,
                           end_logits,
                           data['target_start'].cuda(),
                           data['target_end'].cuda(),
                           config=loss_config)

            avg_loss += loss.item() / len(train_loader)
            loss.backward()

            if (step + 1) % acc_steps == 0:
                optimizer.step()
                scheduler.step()
                model.zero_grad()
        model.eval()
        avg_val_loss = 0.0
        val_jac = 0.0

        if epoch >= swa_first_epoch:
            optimizer.update_swa()
            optimizer.swap_swa_sgd()

        with torch.no_grad():
            for data in val_loader:
                start_logits, end_logits = model(
                    data["ids"].cuda(),
                    data['sentiment_input'].cuda(),
                    data['probas_start'].cuda(),
                    data['probas_end'].cuda())

                loss = loss_fn(start_logits.detach(),
                               end_logits.detach(),
                               data["target_start"].cuda().detach(),
                               data["target_end"].cuda().detach(),
                               config=loss_config)

                avg_val_loss += loss.item() / len(val_loader)

                val_jac += utils.jaccard_from_logits_string(
                    data, start_logits, end_logits) / len(val_dataset)

        if epoch >= swa_first_epoch:
            optimizer.swap_swa_sgd()

        if val_jac >= best_jac and cp:
            utils.save_model_weights(model, 'checkpoint.pt', verbose=0)
            best_jac = val_jac

        dt = time.time() - start_time
        lr = scheduler.get_lr()[0]
        print(f'Epoch {epoch + 1}/{epochs} \t lr={lr:.1e} \t t={dt:.0f}s \t',
              end='')
        print(f'loss={avg_loss:.3f} \t val_loss={avg_val_loss:.3f} \t val_jaccard={val_jac:.4f}')

    del loss, data, avg_val_loss, avg_loss, train_loader, val_loader
    torch.cuda.empty_cache()
    gc.collect()

    if epoch >= swa_first_epoch:
        optimizer.swap_swa_sgd()

    return best_jac if cp else val_jac
Exemplo n.º 8
0
def k_fold(df_train, df_test,
           X_train, X_test, preds,
           len_voc, k=5,
           fold_seed=42, model_seed=42, verbose=1,
           save=True, cp=False):
    time = str(datetime.datetime.now())[:16]
    score = 0
    splits = list(StratifiedKFold(n_splits=k, random_state=fold_seed).split(
        X=df_train, y=df_train['sentiment']))

    pred_oof = [[[], []] for i in range(len(df_train))]
    pred_tests = []

    test_dataset = dataset.TweetCharDataset(df_test, X_test,
                                            preds['test_start'],
                                            preds['test_end'],
                                            max_len=config.MAX_LEN,
                                            train=False,
                                            n_models=len(config.MODELS))

    for i, (train_idx, val_idx) in enumerate(splits):
        print(f"\n-------------   Fold {i + 1}  -------------")
        utils.seed_everything(model_seed)

        model = models.Wavenet(
                len_voc,
                use_msd=config.USE_MSD, 
                n_models=len(config.MODELS),  
                use_bn=config.USE_BN,
                cnn_dim=config.CNN_DIM,
                proba_cnn_dim=config.PROBA_CNN_DIM,
                char_embed_dim=config.CHAR_EMBED_DIM,
                sent_embed_dim=config.SENT_EMBED_DIM,
                kernel_size=config.KERNEL_SIZE,
            ).cuda()
        model.zero_grad()

        train_dataset = dataset.TweetCharDataset(df_train.iloc[train_idx],
                                                 X_train[train_idx],
                                                 preds['oof_start'][train_idx],
                                                 preds['oof_end'][train_idx],
                                                 max_len=config.MAX_LEN,
                                                 n_models=len(config.MODELS))

        val_dataset = dataset.TweetCharDataset(df_train.iloc[val_idx],
                                               X_train[val_idx],
                                               preds['oof_start'][val_idx],
                                               preds['oof_end'][val_idx],
                                               max_len=config.MAX_LEN,
                                               n_models=len(config.MODELS))

        print('\n- Training all layers: ')
        utils.unfreeze(model)
        n_parameters = utils.count_parameters(model)
        print(f'    -> {n_parameters} trainable parameters\n')

        fold_score = fit(model,
                         train_dataset,
                         val_dataset,
                         config.loss_config,
                         epochs=config.EPOCHS,
                         batch_size=config.TRAIN_BATCH_SIZE,
                         lr=config.LR,
                         warmup_prop=config.WAMUP_PROP,
                         cp=cp)

        score += fold_score / k

        print('\n- Predicting ')

        pred_val_start, pred_val_end = predict(
            model, val_dataset, batch_size=config.VALID_BATCH_SIZE)
        for j, idx in enumerate(val_idx):
            pred_oof[idx] = [pred_val_start[j], pred_val_end[j]]

        pred_test = predict(
            model, test_dataset, batch_size=config.VALID_BATCH_SIZE)
        pred_tests.append(pred_test)

        if cp:
            utils.load_model_weights(model, "checkpoint.pt", verbose=0)
        if save:
            utils.save_model_weights(
                model,
                f'{config.selected_model}_{time}_{i + 1}.pt',
                cp_folder=config.CP_PATH)

        del model, train_dataset, val_dataset
        torch.cuda.empty_cache()
        gc.collect()

    print(f'\n Local CV jaccard is {score:.4f}')
    return pred_oof, pred_tests