def get_log_normalized_dls(train, test, bs=1024): """Get lognormalized DataLoaders from train and test DataFrames. Parameters ---------- train : DataFrame Training data. test : DataFrame Test data. bs : int Batch size. Returns ------- (DataLoader, DataLoader) Train and test DataLoaders. """ train, test = log_normalize(train, test) train_x = train test_x = test train_y = train_x # y = x since we are building and AE test_y = test_x train_ds = TensorDataset(torch.tensor(train_x.values, dtype=torch.float), torch.tensor(train_y.values, dtype=torch.float)) valid_ds = TensorDataset(torch.tensor(test_x.values, dtype=torch.float), torch.tensor(test_y.values, dtype=torch.float)) train_dl, valid_dl = get_data(train_ds, valid_ds, bs) return train_dl, valid_dl
def db_from_df(train, test, bs=1024): # Create TensorDatasets train_ds = TensorDataset(torch.tensor(train.values), torch.tensor(train.values)) valid_ds = TensorDataset(torch.tensor(test.values), torch.tensor(test.values)) # Create DataLoaders train_dl, valid_dl = get_data(train_ds, valid_ds, bs=bs) # Return DataBunch return basic_data.DataBunch(train_dl, valid_dl)
# test = pd.read_pickle(BIN + 'processed_data/aod/scaled_all_jets_partial_test_10percent.pkl') train = pd.read_pickle(BIN + 'processed_data/aod/custom_normalized_train_10percent') test = pd.read_pickle(BIN + 'processed_data/aod/custom_normalized_test_10percent') # Remove Width and WidthPhi train.pop('Width') train.pop('WidthPhi') test.pop('Width') test.pop('WidthPhi') bs = 4096 # Create TensorDatasets train_ds = TensorDataset(torch.tensor(train.values, dtype=torch.float), torch.tensor(train.values, dtype=torch.float)) valid_ds = TensorDataset(torch.tensor(test.values, dtype=torch.float), torch.tensor(test.values, dtype=torch.float)) # Create DataLoaders train_dl, valid_dl = get_data(train_ds, valid_ds, bs=bs) # Return DataBunch db = basic_data.DataBunch(train_dl, valid_dl) # loss_func = RMSELoss() loss_func = nn.MSELoss() bn_wd = False # Don't use weight decay for batchnorm layers true_wd = True # wd will be used for all optimizers # Figures setup plt.close('all') unit_list = ['[GeV]', '[rad]', '[rad]', '[GeV]'] variable_list = [r'$p_T$', r'$\eta$', r'$\phi$', r'$E$'] line_style = ['--', '-']
leading_df, subleading_df = utils.filter_mc_jets(leading_df, subleading_df) subleading_df, leading_df = utils.filter_mc_jets(subleading_df, leading_df) leading_df, subleading_df = utils.custom_normalization(leading_df, subleading_df) # Load trained model latent_dims = [8, 10, 12, 14, 16, 18, 20] bs = 4096 # Create TensorDatasets leading_ds = TensorDataset(torch.tensor(leading_df.values, dtype=torch.float), torch.tensor(leading_df.values, dtype=torch.float)) subleading_ds = TensorDataset(torch.tensor(subleading_df.values, dtype=torch.float), torch.tensor(subleading_df.values, dtype=torch.float)) # Create DataLoaders train_dl, valid_dl = get_data(subleading_ds, subleading_ds, bs=bs) # Return DataBunch db = basic_data.DataBunch(train_dl, valid_dl) module_name = 'AE_bn_LeakyReLU' module = AE_bn_LeakyReLU grid_search_folder = module_name + '_25AOD_grid_search_custom_normalization_1500epochs/' # grid_search_folder = module_name + '_AOD_grid_search_custom_normalization_1500epochs_12D10D8D/' folder_dict = { '20': 'AE_bn_LeakyReLU_bs4096_lr1e-02_wd1e-02_ppNA', # z=20 '18': 'AE_bn_LeakyReLU_bs4096_lr1e-02_wd1e-02_ppNA', # z=18 '16': 'AE_bn_LeakyReLU_bs4096_lr3e-02_wd1e-04_ppNA', # z=16 '14': 'AE_bn_LeakyReLU_bs4096_lr1e-02_wd1e-02_ppNA', # z=14 '12': 'AE_bn_LeakyReLU_bs4096_lr1e-03_wd1e-01_ppNA', # z=12 '10': 'AE_bn_LeakyReLU_bs4096_lr1e-03_wd1e-02_ppNA', # z=10