def applyPreprocessing(sources, num_samples, out_dir, num_processes, clean_pandas=False, clean_archive=False, size=250, single_list=True, sort_columns=["MaxLepDeltaR"], sort_ascending=False, photon_max=DEFAULT_PHOTON_MAX, neutral_max=DEFAULT_NEUTRAL_MAX, charged_max=DEFAULT_CHARGED_MAX, ): #Run Final processing label_dir_pairs = [(s.split("/")[-1], s) for s in sources] print(label_dir_pairs) object_profiles = [ # ObjectProfile("Photon", -1, pre_sort_columns=["PT_ET"], pre_sort_ascending=False, sort_columns=[sort_on], sort_ascending=False, addColumns={"ObjType":3}), ObjectProfile("EFlowPhoton", photon_max, pre_sort_columns=["PT_ET"], pre_sort_ascending=False, sort_columns=sort_columns, sort_ascending=sort_ascending, addColumns={"ObjFt1": -1, "ObjFt2": -1, "ObjFt3": -1}), ObjectProfile("EFlowNeutralHadron", neutral_max, pre_sort_columns=["PT_ET"], pre_sort_ascending=False, sort_columns=sort_columns, sort_ascending=sort_ascending, addColumns={"ObjFt1": -1, "ObjFt2": -1, "ObjFt3": 1}), ObjectProfile("EFlowTrack", charged_max, pre_sort_columns=["PT_ET"], pre_sort_ascending=False, sort_columns=sort_columns, sort_ascending=sort_ascending, addColumns={"ObjFt1": -1, "ObjFt2": 1, "ObjFt3": -1}), ObjectProfile("Electron", 8, pre_sort_columns=["PT_ET"], pre_sort_ascending=False, sort_columns=sort_columns, sort_ascending=sort_ascending, addColumns={"ObjFt1": -1, "ObjFt2": 1, "ObjFt3": 1}), ObjectProfile("MuonTight", 8, pre_sort_columns=["PT_ET"], pre_sort_ascending=False, sort_columns=sort_columns, sort_ascending=sort_ascending, addColumns={"ObjFt1": 1, "ObjFt2": -1, "ObjFt3": -1}), ObjectProfile("MissingET", 1, addColumns={"ObjFt1": 1, "ObjFt2": -1, "ObjFt3": 1}), ] temp_archive = "/".join([out_dir,'temp_archive']) if not os.path.exists(temp_archive): os.mkdir(temp_archive) stride = strideFromTargetSize(object_profiles, label_dir_pairs, DEFAULT_OBSERV_TYPES, megabytes=size) print(stride) #Here we are essentially creating dps = procsFrom_label_dir_pairs(0, num_samples, stride, temp_archive, label_dir_pairs, object_profiles, DEFAULT_OBSERV_TYPES, single_list=single_list, sort_columns=sort_columns, sort_ascending=sort_ascending, verbose=0) batchAssertArchived(dps,num_processes=num_processes)
for max_EFlow_size in [100]: #[100, 200]: object_profiles = [ #ObjectProfile("Electron",-1), # ObjectProfile("MuonTight", -1), # ObjectProfile("Photon", -1), ObjectProfile("MissingET", 1) #, # ObjectProfile("EFlowPhoton",max_EFlow_size, sort_columns=[sort_on], sort_ascending=False), # ObjectProfile("EFlowNeutralHadron",max_EFlow_size, sort_columns=[sort_on], sort_ascending=False), # ObjectProfile("EFlowTrack",max_EFlow_size, sort_columns=[sort_on], sort_ascending=False)] ] resolveProfileMaxes(object_profiles, ldp) dps, l = getGensDefaultFormat(archive_dir, (100000,20000), 120000, \ object_profiles,ldp,observ_types,megabytes=100, verbose=0) dependencies = batchAssertArchived(dps) train, num_train = l[0] val, num_val = l[1] # test, num_test = l[2] max_q_size = l[2] print("MAXQ: ", max_q_size) for name in ['lorentz', 'not_lorentz', 'control_dense']: for sphereCoords in [False]: for weight_output in [False, True]: for depth in [2, 3, 4, 5]: for width in [10, 25]: for activation in ['relu']: for dropout in [0.0]: #Weight output is really only for lorentz if (weight_output == True