def __init__(self, standard=False, feature_subset="all"): #use if already converted to cartesian #with open('data/pi0_cartesian_train.pkl', 'rb') as f: #x = np.array(pickle.load(f), dtype=np.float32) #Use if not already converted with open('data/pi0.pkl', 'rb') as f: xz = np.array(pickle.load(f), dtype=np.float32) x = cartesian_converter(xz, type='x') z = cartesian_converter(xz, type='z') if feature_subset != "all": x = x[:, feature_subset] z = z[:, feature_subset] xwithoutPid = x self.qt = self.quant_tran(x) #Commented out because currently ton using Quant trans. # df_x = pd.DataFrame(self.qt.transform(x)) #Don't know how to do this without first making it a DF # x_np = df_x.to_numpy() #And then converting back to numpy # self.x = torch.from_numpy(np.array(x_np)) self.xz = xz self.x = torch.from_numpy(np.array(x)) self.xwithoutPid = torch.from_numpy(np.array(xwithoutPid)) self.z = torch.from_numpy(np.array(z)) if standard: self.standardize()
dfs = [] filenames = os.listdir(data_path) for f in filenames: df0 = pd.read_pickle(data_path+f) dfs.append(df0) df_nflow_data = pd.concat(dfs) nflow_data_len = len(df_nflow_data.index) print("The Generated dataset has {} events".format(nflow_data_len)) with open('data/pi0.pkl', 'rb') as f: xz = np.array(pickle.load(f), dtype=np.float32) x = cartesian_converter(xz,type='x') z = cartesian_converter(xz,type='z') df_test_data = pd.DataFrame(x) df_test_data_z = pd.DataFrame(z) #df_nflow_data = df_test_data_z #df_test_data = df_test_data_all.sample(n=nflow_data_len) if len(df_nflow_data) > len(df_test_data): df_nflow_data = df_nflow_data.sample(n=len(df_test_data)) else: df_test_data = df_test_data.sample(n=len(df_nflow_data)) df_test_data_z = df_test_data_z.sample(n=len(df_nflow_data))
def __init__(self, standard = False, feature_subset = "all", test=False): #use if already converted to cartesian #with open('data/pi0_cartesian_train.pkl', 'rb') as f: #x = np.array(pickle.load(f), dtype=np.float32) #For building Quantile transforms qt_data = 'data/pi0_spherical_train.pkl' with open(qt_data, 'rb') as fname: qt_xz = np.array(pickle.load(fname), dtype=np.float32) qt_x = cartesian_converter(qt_xz,type='z') qt_z = cartesian_converter(qt_xz,type='x') if feature_subset != "all": qt_x = qt_x[:,feature_subset] qt_z = qt_z[:,feature_subset] self.qt_x = self.quant_tran(qt_x) self.qt_z = self.quant_tran(qt_z) #Use if not already converted if test: print("Test flag is enabled") fname = 'data/pi0_spherical_test.pkl' if test else 'data/pi0_spherical_train.pkl' print(fname) with open(fname, 'rb') as f: xz = np.array(pickle.load(f), dtype=np.float32) x = cartesian_converter(xz,type='z') z = cartesian_converter(xz,type='x') if feature_subset != "all": x = x[:,feature_subset] z = z[:,feature_subset] xwithoutPid = x #self.qt = self.quant_tran(x) #For use with quant trans. df_x = pd.DataFrame(self.qt_x.transform(x)) #Don't know how to do this without first making it a DF df_z = pd.DataFrame(self.qt_z.transform(z)) #Don't know how to do this without first making it a DF x_np = df_x.to_numpy() #And then converting back to numpy z_np = df_z.to_numpy() #And then converting back to numpy # #IF USING QT: self.x = torch.from_numpy(np.array(x_np)) self.z = torch.from_numpy(np.array(z_np)) # IF NOT USING QT: #self.x = torch.from_numpy(np.array(x)) #self.z = torch.from_numpy(np.array(z)) self.xz = xz #Commented out because currently ton using Quant trans. # df_x = pd.DataFrame(self.qt.transform(x)) #Don't know how to do this without first making it a DF # x_np = df_x.to_numpy() #And then converting back to numpy # self.x = torch.from_numpy(np.array(x_np)) # #Xommented out because trying to reimplement quant trans. # #self.xz = xz # self.x = torch.from_numpy(np.array(x)) # self.xwithoutPid = torch.from_numpy(np.array(xwithoutPid)) # self.z = torch.from_numpy(np.array(z)) if standard: self.standardize()
from utils.utilities import split_data from utils.utilities import cartesian_converter import pandas as pd import numpy as np import pickle5 as pickle if __name__ == "__main__": with open('data/pi0.pkl', 'rb') as f: xz = np.array(pickle.load(f), dtype=np.float64) x = cartesian_converter(xz) #pi0.pkl is in spherical coordinates, need to convert to cartesian dfx = pd.DataFrame(x) train,test = split_data(dfx) train.to_pickle("data/pi0_cartesian_train.pkl") test.to_pickle("data/pi0_cartesian_test.pkl")