Пример #1
0
def get_log_normalized_dls(train, test, bs=1024):
    """Get lognormalized DataLoaders from train and test DataFrames.

    Parameters
    ----------
    train : DataFrame
        Training data.
    test : DataFrame
        Test data.
    bs : int
        Batch size.

    Returns
    -------
    (DataLoader, DataLoader)
        Train and test DataLoaders.

    """
    train, test = log_normalize(train, test)
    train_x = train
    test_x = test
    train_y = train_x  # y = x since we are building and AE
    test_y = test_x

    train_ds = TensorDataset(torch.tensor(train_x.values, dtype=torch.float),
                             torch.tensor(train_y.values, dtype=torch.float))
    valid_ds = TensorDataset(torch.tensor(test_x.values, dtype=torch.float),
                             torch.tensor(test_y.values, dtype=torch.float))
    train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
    return train_dl, valid_dl
Пример #2
0
def db_from_df(train, test, bs=1024):
    # Create TensorDatasets
    train_ds = TensorDataset(torch.tensor(train.values),
                             torch.tensor(train.values))
    valid_ds = TensorDataset(torch.tensor(test.values),
                             torch.tensor(test.values))
    # Create DataLoaders
    train_dl, valid_dl = get_data(train_ds, valid_ds, bs=bs)
    # Return DataBunch
    return basic_data.DataBunch(train_dl, valid_dl)
# test = pd.read_pickle(BIN + 'processed_data/aod/scaled_all_jets_partial_test_10percent.pkl')
train = pd.read_pickle(BIN + 'processed_data/aod/custom_normalized_train_10percent')
test = pd.read_pickle(BIN + 'processed_data/aod/custom_normalized_test_10percent')

# Remove Width and WidthPhi
train.pop('Width')
train.pop('WidthPhi')
test.pop('Width')
test.pop('WidthPhi')

bs = 4096
# Create TensorDatasets
train_ds = TensorDataset(torch.tensor(train.values, dtype=torch.float), torch.tensor(train.values, dtype=torch.float))
valid_ds = TensorDataset(torch.tensor(test.values, dtype=torch.float), torch.tensor(test.values, dtype=torch.float))
# Create DataLoaders
train_dl, valid_dl = get_data(train_ds, valid_ds, bs=bs)
# Return DataBunch
db = basic_data.DataBunch(train_dl, valid_dl)

# loss_func = RMSELoss()
loss_func = nn.MSELoss()

bn_wd = False  # Don't use weight decay for batchnorm layers
true_wd = True  # wd will be used for all optimizers


# Figures setup
plt.close('all')
unit_list = ['[GeV]', '[rad]', '[rad]', '[GeV]']
variable_list = [r'$p_T$', r'$\eta$', r'$\phi$', r'$E$']
line_style = ['--', '-']
leading_df, subleading_df = utils.filter_mc_jets(leading_df, subleading_df)
subleading_df, leading_df = utils.filter_mc_jets(subleading_df, leading_df)
leading_df, subleading_df = utils.custom_normalization(leading_df, subleading_df)

# Load trained model
latent_dims = [8, 10, 12, 14, 16, 18, 20]

bs = 4096
# Create TensorDatasets
leading_ds = TensorDataset(torch.tensor(leading_df.values, dtype=torch.float),
                           torch.tensor(leading_df.values, dtype=torch.float))
subleading_ds = TensorDataset(torch.tensor(subleading_df.values, dtype=torch.float),
                              torch.tensor(subleading_df.values, dtype=torch.float))

# Create DataLoaders
train_dl, valid_dl = get_data(subleading_ds, subleading_ds, bs=bs)
# Return DataBunch
db = basic_data.DataBunch(train_dl, valid_dl)

module_name = 'AE_bn_LeakyReLU'
module = AE_bn_LeakyReLU

grid_search_folder = module_name + '_25AOD_grid_search_custom_normalization_1500epochs/'
# grid_search_folder = module_name + '_AOD_grid_search_custom_normalization_1500epochs_12D10D8D/'
folder_dict = {
    '20': 'AE_bn_LeakyReLU_bs4096_lr1e-02_wd1e-02_ppNA',  # z=20
    '18': 'AE_bn_LeakyReLU_bs4096_lr1e-02_wd1e-02_ppNA',  # z=18
    '16': 'AE_bn_LeakyReLU_bs4096_lr3e-02_wd1e-04_ppNA',  # z=16
    '14': 'AE_bn_LeakyReLU_bs4096_lr1e-02_wd1e-02_ppNA',  # z=14
    '12': 'AE_bn_LeakyReLU_bs4096_lr1e-03_wd1e-01_ppNA',  # z=12
    '10': 'AE_bn_LeakyReLU_bs4096_lr1e-03_wd1e-02_ppNA',  # z=10