Exemplo n.º 1
0
def convert_vector(yaml_config, parameters, labels, parameters_df, labels_df, parameters_val, labels_val):
	"""
	Return: Numpy array (Events, Features)
	Input: yaml_config    | Dictionary of config options
		   parameters     | List of parameters
		   labels         | List of labels
	       parameters_df  | Parameters dataframe
		   labels_df      | Labels dataframe
		   parameters_val | Parameters array
		   labels_val     | Labels array
	"""	
	print('Converting data to vector...')
	
	max_particles = yaml_config['MaxParticles']
	vec_length = max_particles * (len(parameters) - 1)

	# Allocate space
	parameters_vec = np.zeros((len(labels_df), vec_length))

	# Progress
	event_num = 0. ; total_num = len(labels_df)

	for i in range(len(labels_df)):
		parameters_df_i = parameters_df[parameters_df['jet_index'] == labels_df['jet_index'].iloc[i]]
		index_values = parameters_df_i.index.values
		parameters_val_i = parameters_val[index_values, :-1]
		num_particles = len(parameters_val_i)

		if num_particles > max_particles:
			parameters_val_i = parameters_val_i[0:max_particles, :]
			
		else:
			parameters_val_i = np.concatenate([parameters_val_i, np.zeros((max_particles - num_particles,
						                      							   len(parameters) - 1))])
			
		if yaml_config['Shuffle']: np.random.shuffle(parameters_val_i)

		vector = []
		for idx in range(len(parameters_val_i)):
			vector.extend(parameters_val_i[idx])

		parameters_vec[i,:] = vector
			
		event_num += 1.
		progress.update_progress(event_num/total_num)
	
	parameters_val = parameters_vec

	return parameters_val, labels_val
Exemplo n.º 2
0
def convert_image(yaml_config, parameters, labels, parameters_df, labels_df, parameters_val, labels_val):
	"""
	Return: Numpy array (Events, Eta, Phi, Features)
	Input: yaml_config    | Dictionary of config options
		   parameters     | List of parameters
		   labels         | List of labels
	       parameters_df  | Parameters dataframe
		   labels_df      | Labels dataframe
		   parameters_val | Parameters array
		   labels_val     | Labels array
	"""	
	print('Converting data to image...')

	BinsX = yaml_config['BinsX']
	BinsY = yaml_config['BinsY']
	xbins = np.linspace(yaml_config['MinX'], yaml_config['MaxX'], BinsX + 1)
	ybins = np.linspace(yaml_config['MinY'], yaml_config['MaxY'], BinsY + 1)
	parameters.remove('eta'); parameters.remove('phi')
	parameters.remove('jet_eta'); parameters.remove('jet_phi'); parameters.remove('jet_index')
		
	# Allocate space
	parameters_image = np.zeros((len(labels_df), BinsX, BinsY, len(parameters))) 	

	# Progress
	event_num = 0. ; total_num = len(labels_df)

	for i in range(len(labels_df)):
		parameters_df_i = parameters_df[parameters_df['jet_index'] == labels_df['jet_index'].iloc[i]]
			
		eta = np.asarray(parameters_df_i['eta'])
		phi = np.asarray(parameters_df_i['phi'])
		scaled_eta = np.asarray(parameters_df_i['jet_eta']) - eta 
		scaled_phi = np.asarray(parameters_df_i['jet_phi']) - phi
		
		for param_idx, param in enumerate(parameters):
			w = np.asarray(parameters_df_i[param])
			hist, _, _ = np.histogram2d(scaled_eta, scaled_phi, weights=w, bins=(xbins, ybins))
				
			for ix in range(BinsX):
				for iy in range(BinsY):
					parameters_image[i, ix, iy, param_idx] = hist[ix, iy]

		event_num += 1.
		progress.update_progress(event_num/total_num)

	parameters_val = parameters_image

	return parameters_val, labels_val
Exemplo n.º 3
0
def convert_data(tree, number=None, regression=False, puppi=False):
    """
	Particle flow inputs => Numpy structured array
	"""
    event_num = 0.  # Event counter
    jet_num = 0  # Jet counter
    particle_num = 0  # Particle counter
    if number: total_num = number
    else: total_num = int(tree.GetEntries())

    # Parameter lists
    pt = []
    eta = []
    phi = []
    energy = []
    photon_ID = []
    electron_ID = []
    muon_ID = []
    neutral_hadron_ID = []
    charged_hadron_ID = []
    jet_pt = []
    jet_eta = []
    jet_phi = []
    jet_index = []
    if regression:
        tau_pt = []
        tau_eta = []
        tau_phi = []
    classification = []

    for event in tree:
        if event_num == total_num: break

        jet_candidates = create_jets(event, puppi=puppi)
        tau_candidates = create_taus(event)
        jets = match_taus(jet_candidates, tau_candidates)

        # Fill parameters lists
        for jet, seed, tau in jets:
            if regression and (not tau): continue
            for part, ID in jet:

                # ID Parameters
                if ID == 22:
                    photon_ID.append(1)
                    electron_ID.append(0)
                    muon_ID.append(0)
                    neutral_hadron_ID.append(0)
                    charged_hadron_ID.append(0)

                elif abs(ID) == 11:
                    photon_ID.append(0)
                    electron_ID.append(1)
                    muon_ID.append(0)
                    neutral_hadron_ID.append(0)
                    charged_hadron_ID.append(0)

                elif abs(ID) == 13:
                    photon_ID.append(0)
                    electron_ID.append(0)
                    muon_ID.append(1)
                    neutral_hadron_ID.append(0)
                    charged_hadron_ID.append(0)

                elif ID == 130:
                    photon_ID.append(0)
                    electron_ID.append(0)
                    muon_ID.append(0)
                    neutral_hadron_ID.append(1)
                    charged_hadron_ID.append(0)

                elif ID == 211:
                    photon_ID.append(0)
                    electron_ID.append(0)
                    muon_ID.append(0)
                    neutral_hadron_ID.append(0)
                    charged_hadron_ID.append(1)

                else:
                    continue

                # particle parameters
                eta_val = seed.Eta() - part.Eta()
                phi_val = delta_phi(seed.Phi(), part.Phi())
                pt.append(part.Pt())
                eta.append(eta_val)
                phi.append(phi_val)
                energy.append(part.E())

                # jet parameters
                jet_pt.append(seed.Pt())
                jet_eta.append(seed.Eta())
                jet_phi.append(seed.Phi())
                jet_index.append(jet_num)

                # tau parameters
                if tau:
                    if regression:
                        tau_pt.append(tau.Pt())
                        tau_eta.append(tau.Eta())
                        tau_phi.append(tau.Phi())
                    classification.append(1)

                else:
                    classification.append(0)

                particle_num += 1
            jet_num += 1
        event_num += 1
        progress.update_progress(event_num / total_num)

    fields = [('pt', 'f8'), ('eta', 'f8'), ('phi', 'f8'), ('energy', 'f8'),
              ('photon_ID', 'i4'), ('electron_ID', 'i4'), ('muon_ID', 'i4'),
              ('neutral_hadron_ID', 'i4'), ('charged_hadron_ID', 'i4'),
              ('jet_pt', 'f8'), ('jet_eta', 'f8'), ('jet_phi', 'f8'),
              ('jet_index', 'i8'), ('classification', 'i4')]

    if regression:
        fields.extend([('tau_pt', 'f8'), ('tau_eta', 'f8'), ('tau_phi', 'f8')])

    data = np.zeros(particle_num, dtype=fields)

    data['pt'] = pt
    data['eta'] = eta
    data['phi'] = phi
    data['energy'] = energy
    data['photon_ID'] = photon_ID
    data['electron_ID'] = electron_ID
    data['muon_ID'] = muon_ID
    data['neutral_hadron_ID'] = neutral_hadron_ID
    data['charged_hadron_ID'] = charged_hadron_ID
    data['jet_pt'] = jet_pt
    data['jet_eta'] = jet_eta
    data['jet_phi'] = jet_phi
    if regression:
        data['tau_pt'] = tau_pt
        data['tau_eta'] = tau_eta
        data['tau_phi'] = tau_phi
    data['jet_index'] = jet_index
    data['classification'] = classification

    return data
Exemplo n.º 4
0
def plot_data(tree, number=None):
    """
	Plot matching efficiencies
	"""
    event_num = 0.  # Event counter
    if number: total_num = number
    else: total_num = int(tree.GetEntries())

    taus_matched = 0
    taus_reconstructed = 0

    # Initialize histograms
    hist_pt1, hist_pt2 = initialize_hist(25, 0, 500)
    hist_eta1, hist_eta2 = initialize_hist(15, -2.5, 2.5)
    hist_lpt1, hist_lpt2 = initialize_hist(25, 0, 500)
    hist_leta1, hist_leta2 = initialize_hist(15, -2.5, 2.5)
    hist_slpt1, hist_slpt2 = initialize_hist(25, 0, 500)
    hist_sleta1, hist_sleta2 = initialize_hist(15, -2.5, 2.5)

    cnt = Counter()

    for event in tree:
        if event_num == total_num: break
        jet_candidates = create_jets(event)
        tau_candidates = create_taus(event)

        jets = []  # List of jets
        used_candidates = []
        for seed, jet in jet_candidates:
            tau = None
            for vec in tau_candidates:
                if (seed.DeltaR(vec) < 0.4) and (vec not in used_candidates):
                    tau = vec
                    used_candidates.append(vec)
                    break
            jets.append((jet, seed, tau))

        # Fill histograms
        taus_matched += len(used_candidates)
        taus_reconstructed += len(tau_candidates)

        for vec in used_candidates:
            hist_pt1.Fill(vec.Pt())
            hist_eta1.Fill(vec.Eta())
        for vec in tau_candidates:
            hist_pt2.Fill(vec.Pt())
            hist_eta2.Fill(vec.Eta())

        tau_candidates = sorted(tau_candidates, key=lambda x: x.Pt())[::-1]

        if len(tau_candidates) >= 1:
            hist_lpt2.Fill(tau_candidates[0].Pt())
            hist_leta2.Fill(tau_candidates[0].Eta())

            if tau_candidates[0] in used_candidates:
                hist_lpt1.Fill(tau_candidates[0].Pt())
                hist_leta1.Fill(tau_candidates[0].Eta())

            if len(tau_candidates) >= 2:
                hist_slpt2.Fill(tau_candidates[1].Pt())
                hist_sleta2.Fill(tau_candidates[1].Eta())

                if tau_candidates[1] in used_candidates:
                    hist_slpt1.Fill(tau_candidates[1].Pt())
                    hist_sleta1.Fill(tau_candidates[1].Eta())

        event_num += 1
        progress.update_progress(event_num / total_num)

    # Plot histograms
    print 'Taus matched: ', taus_matched
    print 'Taus reconstructed: ', taus_reconstructed

    plot_hist(hist_pt1, hist_pt2, 'matching_efficiency_pt', 'pt', 'efficiency')
    plot_hist(hist_lpt1, hist_lpt2, 'matching_efficiency_leadingpt', 'pt',
              'efficiency')
    plot_hist(hist_slpt1, hist_slpt2, 'matching_efficiency_subleadingpt', 'pt',
              'efficiency')
    plot_hist(hist_eta1, hist_eta2, 'matching_efficiency_eta', 'eta',
              'efficiency')
    plot_hist(hist_leta1, hist_leta2, 'matching_efficiency_leadingeta', 'eta',
              'efficiency')
    plot_hist(hist_sleta1, hist_sleta2, 'matching_efficiency_subleadingeta',
              'eta', 'efficiency')

    return None
Exemplo n.º 5
0
def convert_data(tree, number=None):
    """
	Convert data for binary classification
	"""
    event_num = 0.
    jet_num = 0
    particle_num = 0
    if number: total_num = number
    else: total_num = int(tree.GetEntries())

    # Parameter lists
    pt = []
    eta = []
    phi = []
    et = []
    photon_ID = []
    electron_ID = []
    hadron_ID = []
    jet_pt = []
    jet_eta = []
    jet_phi = []
    jet_et = []
    jet_index = []
    classification = []

    for event in tree:
        if event_num == total_num: break
        for jet_idx, jet_id in enumerate(
                event.genjetid):  # iterate through jet
            for k, _ in enumerate(
                    event.genindex):  # iterate through jet particles
                if (event.genindex[k] == jet_idx):

                    if (event.genid[k] == 22):
                        photon_ID.append(1)
                        electron_ID.append(0)
                        hadron_ID.append(0)

                    elif (abs(event.genid[k]) == 11):
                        photon_ID.append(0)
                        electron_ID.append(1)
                        hadron_ID.append(0)

                    elif (abs(event.genid[k]) > 40):
                        photon_ID.append(0)
                        electron_ID.append(0)
                        hadron_ID.append(1)

                    else:
                        continue

                    # particle parameters
                    eta_val = event.genjeteta[jet_idx] - event.geneta[k]
                    phi_val = delta_phi(event.genjetphi[jet_idx],
                                        event.genphi[k])
                    pt.append(event.genpt[k])
                    eta.append(eta_val)
                    phi.append(phi_val)
                    et.append(event.genet[k])

                    # jet parameters
                    jet_pt.append(event.genjetpt[jet_idx])
                    jet_eta.append(event.genjeteta[jet_idx])
                    jet_phi.append(event.genjetphi[jet_idx])
                    jet_et.append(event.genjetet[jet_idx])
                    jet_index.append(jet_num)

                    if (abs(jet_id) >= 4):
                        classification.append(1)
                    else:
                        classification.append(0)

                    particle_num += 1

            jet_num += 1

        event_num += 1.
        progress.update_progress(event_num / total_num)

    fields = [('pt', 'f8'), ('eta', 'f8'), ('phi', 'f8'), ('et', 'f8'),
              ('photon_ID', 'i4'), ('electron_ID', 'i4'), ('hadron_ID', 'i4'),
              ('jet_pt', 'f8'), ('jet_eta', 'f8'), ('jet_phi', 'f8'),
              ('jet_et', 'f8'), ('jet_index', 'i8'), ('classification', 'i4')]

    data = np.zeros(particle_num, dtype=fields)

    data['pt'] = pt
    data['eta'] = eta
    data['phi'] = phi
    data['et'] = et
    data['photon_ID'] = photon_ID
    data['electron_ID'] = electron_ID
    data['hadron_ID'] = hadron_ID
    data['jet_pt'] = jet_pt
    data['jet_eta'] = jet_eta
    data['jet_phi'] = jet_phi
    data['jet_et'] = jet_et
    data['jet_index'] = jet_index
    data['classification'] = classification

    return data
Exemplo n.º 6
0
def convert_regression_data(tree, number=None):
    """
	Convert data for regression
	"""
    event_num = 0.
    jet_num = 0
    particle_num = 0
    if number: total_num = number
    else: total_num = int(tree.GetEntries())

    # Parameter lists
    pt = []
    eta = []
    phi = []
    et = []
    photon_ID = []
    electron_ID = []
    hadron_ID = []
    jet_pt = []
    jet_eta = []
    jet_phi = []
    jet_et = []
    jet_index = []
    tau_pt = []
    tau_eta = []
    tau_phi = []
    tau_energy = []

    for event in tree:
        if event_num == total_num: break
        for jet_idx, jet_id in enumerate(
                event.genjetid):  # iterate through jet
            if (abs(jet_id) >= 4):  # consider tau jets
                particle_vec = []

                for k, _ in enumerate(
                        event.genindex):  # iterate through jet particles
                    if (event.genindex[k] == jet_idx):
                        if (event.genstatus[k]
                                == 1) and ((event.genid[k] in [11, -11, 22]) or
                                           (abs(event.genid[k]) > 40)):
                            index = k
                            while ((event.genparent[index] != -2)
                                   and (abs(event.genid[index] != 15))):
                                index = event.genparent[index]

                            if (abs(event.genid[index]) == 15):
                                vec_pt = event.genpt[k]
                                vec_eta = event.geneta[k]
                                vec_phi = event.genphi[k]
                                vec_energy = event.genet[k] * np.cosh(vec_eta)

                                vec = TLorentzVector()
                                vec.SetPtEtaPhiE(vec_pt, vec_eta, vec_phi,
                                                 vec_energy)
                                particle_vec.append(vec)

                if particle_vec:
                    vec_sum = TLorentzVector()
                    vec_sum.SetPtEtaPhiE(0., 0., 0., 0.)

                    for vec in particle_vec:
                        vec_sum += vec

                    tau_pt_val = vec_sum.Pt()
                    tau_energy_val = vec_sum.E()
                    tau_eta_val = vec_sum.Eta()
                    tau_phi_val = vec_sum.Phi()

                    for k, _ in enumerate(
                            event.genindex):  # iterate through jet particles
                        if (event.genindex[k] == jet_idx):

                            if (event.genid[k] == 22):
                                photon_ID.append(1)
                                electron_ID.append(0)
                                hadron_ID.append(0)

                            elif (abs(event.genid[k]) == 11):
                                photon_ID.append(0)
                                electron_ID.append(1)
                                hadron_ID.append(0)

                            elif (abs(event.genid[k]) > 40):
                                photon_ID.append(0)
                                electron_ID.append(0)
                                hadron_ID.append(1)

                            else:
                                continue

                            # particle parameters
                            eta_val = event.genjeteta[jet_idx] - event.geneta[k]
                            phi_val = delta_phi(event.genjetphi[jet_idx],
                                                event.genphi[k])
                            pt.append(event.genpt[k])
                            eta.append(eta_val)
                            phi.append(phi_val)
                            et.append(event.genet[k])

                            # jet parameters
                            jet_pt.append(event.genjetpt[jet_idx])
                            jet_eta.append(event.genjeteta[jet_idx])
                            jet_phi.append(event.genjetphi[jet_idx])
                            jet_et.append(event.genjetet[jet_idx])
                            jet_index.append(jet_num)

                            # tau parameters
                            tau_pt.append(tau_pt_val)
                            tau_eta.append(tau_eta_val)
                            tau_phi.append(tau_phi_val)
                            tau_energy.append(tau_energy_val)

                            particle_num += 1

            jet_num += 1

        event_num += 1.
        progress.update_progress(event_num / total_num)

    fields = [('pt', 'f8'), ('eta', 'f8'), ('phi', 'f8'), ('et', 'f8'),
              ('photon_ID', 'i4'), ('electron_ID', 'i4'), ('hadron_ID', 'i4'),
              ('jet_pt', 'f8'), ('jet_eta', 'f8'), ('jet_phi', 'f8'),
              ('jet_et', 'f8'), ('jet_index', 'i8'), ('tau_pt', 'f8'),
              ('tau_eta', 'f8'), ('tau_phi', 'f8'), ('tau_energy', 'f8')]

    data = np.zeros(particle_num, dtype=fields)

    data['pt'] = pt
    data['eta'] = eta
    data['phi'] = phi
    data['et'] = et
    data['photon_ID'] = photon_ID
    data['electron_ID'] = electron_ID
    data['hadron_ID'] = hadron_ID
    data['jet_pt'] = jet_pt
    data['jet_eta'] = jet_eta
    data['jet_phi'] = jet_phi
    data['jet_et'] = jet_et
    data['tau_pt'] = tau_pt
    data['tau_eta'] = tau_eta
    data['tau_phi'] = tau_phi
    data['tau_energy'] = tau_energy
    data['jet_index'] = jet_index

    return data