def bulk_discretize(hdfPath, x_disc, y_disc, z_disc, charge, noise): """Discretizes all events in an HDF5 file using a grid geometry. Parameters ---------- hdfPath : the system path to the hdf5 file to be x_disc : number of slices in x y_disc : number of slices in y z_disc : number of slices in z charge : boolean variable denoting whether or not charge will be included in the discretization noise : boolean variable to add noise to (simulated) data-processing Returns ------- A numpy array of shape (n, x_disc*y_disc*z_disc) where n is the number of events in the provided hdf5 file. """ disc_evts = [] with pytpc.HDFDataFile(hdfPath, 'r') as f: n_evts = len(f) evt_id = 0 while evt_id < n_evts: curEvt = f[evt_id] curxyz = curEvt.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) if noise: curxyz = add_noise(curxyz) if charge: disc_evts.append( discretize_grid_charge(curxyz, x_disc, y_disc, z_disc)) else: disc_evts.append( discretize_grid(curxyz, x_disc, y_disc, z_disc)) if evt_id % 1000 == 0: print("Discretized event " + str(evt_id)) evt_id += 1 discretized_data = sp.sparse.vstack(disc_evts, format='csr') print("Data discretization complete.") return discretized_data
def simulated(projection, data_dir, save_path, prefix): # however many pads we're trying to predict print('Processing data...') print(data_dir) proton_events = pytpc.HDFDataFile(os.path.join(data_dir, prefix + 'proton.h5'), 'r') carbon_events = pytpc.HDFDataFile(os.path.join(data_dir, prefix + 'carbon.h5'), 'r') # Create empty arrays to hold data data = [] # Add proton events to data array for i, event in enumerate(proton_events): xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append([xyzs, 0]) if i % 50 == 0: print('Proton event ' + str(i) + ' added.') # Add carbon events to data array for i, event in enumerate(carbon_events): xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append([xyzs, 1]) if i % 50 == 0: print('Carbon event ' + str(i) + ' added.') # Take the log of charge data log = np.vectorize(_l) for event in data: event[0][:, 3] = log(event[0][:, 3]) data = shuffle(data) partition = int(len(data) * 0.8) train = data[:partition] test = data[partition:] # Normalize max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), data))).max() for e in data: for point in e[0]: point[3] = point[3] / max_charge print('Making images...') # Make Training sets # Make numpy sets train_image_contexts = np.zeros((len(train), 128, 128, 3), dtype=np.uint8) train_images = np.zeros((len(train), 128, 128, 3), dtype=np.uint8) for i, event in enumerate(train): e = event[0] z = e[:, 1] c = e[:, 3] if projection == 'zy': x = e[:, 2] elif projection == 'xy': x = e[:, 0] else: raise ValueError('Invalid projection value.') # create lists for missing regions x_c = [] z_c = [] c_c = [] for j in range(len(e)): # insert deleting condition here if not (-10 <= x[j] <= 127.5 and -117.5 <= z[j] <= 20): x_c.append(x[j]) z_c.append(z[j]) c_c.append(c[j]) # c[j] = 0 # make image context fig = plt.figure(figsize=(1, 1), dpi=128) ax = fig.add_axes([0, 0, 1, 1]) if projection == 'zy': ax.set_xlim(0.0, 1250.0) elif projection == 'xy': ax.set_xlim(-275.0, 275.0) ax.set_ylim((-275.0, 275.0)) ax.set_axis_off() ax.scatter(x_c, z_c, s=0.01, c=c_c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) train_image_contexts[i] = data plt.close() # make image fig = plt.figure(figsize=(1, 1), dpi=128) ax = fig.add_axes([0, 0, 1, 1]) if projection == 'zy': ax.set_xlim(0.0, 1250.0) elif projection == 'xy': ax.set_xlim(-275.0, 275.0) ax.set_ylim((-275.0, 275.0)) ax.set_axis_off() ax.scatter(x, z, s=0.01, c=c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) train_images[i] = data plt.close() # Make Testing sets # Make numpy sets test_image_contexts = np.zeros((len(test), 128, 128, 3), dtype=np.uint8) test_images = np.zeros((len(test), 128, 128, 3), dtype=np.uint8) for i, event in enumerate(test): e = event[0] z = e[:, 1] c = e[:, 3] if projection == 'zy': x = e[:, 2] elif projection == 'xy': x = e[:, 0] else: raise ValueError('Invalid projection value.') # create lists for missing regions x_c = [] z_c = [] c_c = [] for j in range(len(e)): # insert deleting condition here if not (-10 <= x[j] <= 127.5 and -117.5 <= z[j] <= 20): x_c.append(x[j]) z_c.append(z[j]) c_c.append(c[j]) # make image context fig = plt.figure(figsize=(1, 1), dpi=128) ax = fig.add_axes([0, 0, 1, 1]) if projection == 'zy': ax.set_xlim(0.0, 1250.0) elif projection == 'xy': ax.set_xlim(-275.0, 275.0) ax.set_ylim((-275.0, 275.0)) ax.set_axis_off() ax.scatter(x_c, z_c, s=0.01, c=c_c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) test_image_contexts[i] = data plt.close() # make image fig = plt.figure(figsize=(1, 1), dpi=128) ax = fig.add_axes([0, 0, 1, 1]) if projection == 'zy': ax.set_xlim(0.0, 1250.0) elif projection == 'xy': ax.set_xlim(-275.0, 275.0) ax.set_ylim((-275.0, 275.0)) ax.set_axis_off() ax.scatter(x, z, s=0.01, c=c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) test_images[i] = data plt.close() if not os.path.exists(save_path): os.makedirs(save_path) print('Saving file...') filename = os.path.join(save_path, prefix + 'images.h5') # Save to HDF5 h5 = h5py.File(filename, 'w') h5.create_dataset('train_image_contexts', data=train_image_contexts) h5.create_dataset('train_images', data=train_images) h5.create_dataset('test_image_contexts', data=test_image_contexts) h5.create_dataset('test_images', data=test_images) # h5.create_dataset('max_charge', data=np.array([max_charge])) h5.close()
def simulated_unlabeled(projection, noise, data_dir, save_path, prefix, include_junk): print('Processing data...') proton_events = pytpc.HDFDataFile( os.path.join(data_dir, prefix + 'proton.h5'), 'r') carbon_events = pytpc.HDFDataFile( os.path.join(data_dir, prefix + 'carbon.h5'), 'r') # Create empty arrays to hold data data = [] # Add proton events to data array for i, event in enumerate(proton_events): xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) if noise: # Add artificial noise xyzs = dd.add_noise(xyzs).astype('float32') data.append([xyzs, 0]) if i % 50 == 0: print('Proton event ' + str(i) + ' added.') # Add carbon events to data array for i, event in enumerate(carbon_events): xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) if noise: # Add artificial noise xyzs = dd.add_noise(xyzs).astype('float32') data.append([xyzs, 1]) if i % 50 == 0: print('Carbon event ' + str(i) + ' added.') # Create junk events if include_junk: for i in range(len(proton_events)): xyzs = np.empty([1, 4]) if noise: xyzs = dd.add_noise(xyzs).astype('float32') data.append([xyzs, 2]) if i % 50 == 0: print('Junk event ' + str(i) + ' added.') # Take the log of charge data log = np.vectorize(_l) for event in data: event[0][:, 3] = log(event[0][:, 3]) data = shuffle(data) # Normalize max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), data))).max() for e in data: for point in e[0]: point[3] = point[3] / max_charge print('Making images...') # Make numpy sets images = np.empty((len(data), 128, 128, 3), dtype=np.uint8) for i, event in enumerate(data): e = event[0] if projection == 'zy': x = e[:, 2].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() elif projection == 'xy': x = e[:, 0].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() else: raise ValueError('Invalid projection value.') fig = plt.figure(figsize=(1, 1), dpi=128) if projection == 'zy': plt.xlim(0.0, 1250.0) elif projection == 'xy': plt.xlim(-275.0, 275.0) plt.ylim((-275.0, 275.0)) plt.axis('off') plt.scatter(x, z, s=0.6, c=c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) images[i] = data plt.close() if not os.path.exists(save_path): os.makedirs(save_path) print('Saving file...') filename = os.path.join(save_path, prefix + 'images.h5') # Save to HDF5 h5 = h5py.File(filename, 'w') h5.create_dataset('images', data=images) h5.create_dataset('max_charge', data=np.array([max_charge])) h5.close()
def real_labeled(projection, data_dir, save_path, prefix): print('Processing data...') data = [] for run in RUNS: events_file = os.path.join(data_dir, 'run_{}.h5'.format(run)) labels_file = os.path.join(data_dir, 'run_{}_labels.csv'.format(run)) events = pytpc.HDFDataFile(events_file, 'r') labels = pd.read_csv(labels_file, sep=',') proton_indices = labels.loc[(labels['label'] == 'p')]['evt_id'].values carbon_indices = labels.loc[(labels['label'] == 'c')]['evt_id'].values junk_indices = labels.loc[(labels['label'] == 'j')]['evt_id'].values for evt_id in proton_indices: event = events[str(evt_id)] xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append([xyzs, 0]) for evt_id in carbon_indices: event = events[str(evt_id)] xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append([xyzs, 1]) for evt_id in junk_indices: event = events[str(evt_id)] xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append([xyzs, 2]) log = np.vectorize(_l) for event in data: event[0][:, 3] = log(event[0][:, 3]) # Shuffle data data = shuffle(data) # Split into train and test sets partition = int(len(data) * 0.8) train = data[:partition] test = data[partition:] # Normalize max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), train))).max() for e in train: for point in e[0]: point[3] = point[3] / max_charge for e in test: for point in e[0]: point[3] = point[3] / max_charge print('Making images...') # Make train numpy sets train_features = np.empty((len(train), 128, 128, 3), dtype=np.uint8) train_targets = np.empty((len(train), ), dtype=np.uint8) for i, event in enumerate(train): e = event[0] if projection == 'zy': x = e[:, 2].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() elif projection == 'xy': x = e[:, 0].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() else: raise ValueError('Invalid projection value.') fig = plt.figure(figsize=(1, 1), dpi=128) if projection == 'zy': plt.xlim(0.0, 1250.0) elif projection == 'xy': plt.xlim(-275.0, 275.0) plt.ylim((-275.0, 275.0)) plt.axis('off') plt.scatter(x, z, s=0.6, c=c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) train_features[i] = data train_targets[i] = event[1] plt.close() # Make test numpy sets test_features = np.empty((len(test), 128, 128, 3), dtype=np.uint8) test_targets = np.empty((len(test), ), dtype=np.uint8) for i, event in enumerate(test): e = event[0] if projection == 'zy': x = e[:, 2].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() elif projection == 'xy': x = e[:, 0].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() else: raise ValueError('Invalid projection value.') fig = plt.figure(figsize=(1, 1), dpi=128) if projection == 'zy': plt.xlim(0.0, 1250.0) elif projection == 'xy': plt.xlim(-275.0, 275.0) plt.ylim((-275.0, 275.0)) plt.axis('off') plt.scatter(x, z, s=0.6, c=c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) test_features[i] = data test_targets[i] = event[1] plt.close() print('Saving file...') if not os.path.exists(save_path): os.makedirs(save_path) filename = os.path.join(save_path, prefix + 'images.h5') # Save to HDF5 h5 = h5py.File(filename, 'w') h5.create_dataset('train_features', data=train_features) h5.create_dataset('train_targets', data=train_targets) h5.create_dataset('test_features', data=test_features) h5.create_dataset('test_targets', data=test_targets) h5.create_dataset('max_charge', data=np.array([max_charge])) h5.close()
def real_unlabeled(projection, data_dir, save_path, prefix): print('Processing data...') data = [] for run in RUNS: events_file = os.path.join(data_dir, 'run_{}.h5'.format(run)) events = pytpc.HDFDataFile(events_file, 'r') for event in events: xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append([xyzs, -1]) # Take the log of charge data log = np.vectorize(_l) for event in data: event[0][:, 3] = log(event[0][:, 3]) # Shuffle data data = shuffle(data) # Normalize max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), data))).max() for e in data: for point in e[0]: point[3] = point[3] / max_charge print('Making images...') # Make numpy sets images = np.empty((len(data), 128, 128, 3), dtype=np.uint8) for i, event in enumerate(data): e = event[0] if projection == 'zy': x = e[:, 2].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() elif projection == 'xy': x = e[:, 0].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() else: raise ValueError('Invalid projection value.') fig = plt.figure(figsize=(1, 1), dpi=128) if projection == 'zy': plt.xlim(0.0, 1250.0) elif projection == 'xy': plt.xlim(-275.0, 275.0) plt.ylim((-275.0, 275.0)) plt.axis('off') plt.scatter(x, z, s=0.6, c=c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) images[i] = data plt.close() print('Saving file...') if not os.path.exists(save_path): os.makedirs(save_path) filename = os.path.join(save_path, prefix + 'images.h5') # Save to HDF5 h5 = h5py.File(filename, 'w') h5.create_dataset('images', data=images) h5.create_dataset('max_charge', data=np.array([max_charge])) h5.close()
import dataDiscretization as dd import scipy as sp import pandas as pd import pytpc data_path = '../data/real/' disc_path = data_path + '50x50x50/' runs = ['0130', '0210'] x_disc = 50 y_disc = 50 z_disc = 50 for run in runs: data = pytpc.HDFDataFile(data_path + "run_" + run + ".h5", 'r') labels = pd.read_csv(data_path + "run_" + run + "_labels.csv", sep=',') print("Successfully loaded data and labels for run " + str(run) + ".") #discretize proton events p_indices = labels.loc[(labels['label'] == 'p')]['evt_id'].values p_discEvts = [] for evt_id in p_indices: curEvt = data[evt_id] curxyz = curEvt.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False)
def simulated(projection, noise, data_dir, save_path, prefix): print("We are in the simulared() function!") print('Processing data...') proton_events = pytpc.HDFDataFile( os.path.join(data_dir, prefix + 'proton.h5'), 'r') carbon_events = pytpc.HDFDataFile( os.path.join(data_dir, prefix + 'carbon.h5'), 'r') # Create empty arrays to hold data-processing data = [] # Add proton events to data-processing array for i, event in enumerate(proton_events): xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=True, baseline_correction=False, cg_times=False) #if noise: # Add artificial noise #xyzs = dd.add_noise(xyzs).astype('float32') data.append([xyzs, 0]) if i % 50 == 0: print('Proton event ' + str(i) + ' added.') for i, event in enumerate(carbon_events): xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=True, baseline_correction=False, cg_times=False) #if noise: # Add artificial noise # xyzs = dd.add_noise(xyzs).astype('float32') data.append([xyzs, 1]) if i % 50 == 0: print('Carbon event ' + str(i) + ' added.') # Create junk events #for i in range(len(proton_events)): #xyzs = np.empty([1, 4]) #xyzs = dd.add_noise(xyzs).astype('float32') #data.append([xyzs, 2]) #if i % 50 == 0: #print('Junk event ' + str(i) + ' added.') # Take the log of charge data-processing log = np.vectorize(_l) for event in data: event[0][:, 3] = log(event[0][:, 3]) # Split into train and test sets data = shuffle(data) partition = int(len(data) * 0.8) train = data[:partition] test = data[partition:] # Normalize max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), train))).max() for e in train: for point in e[0]: point[3] = point[3] / max_charge for e in test: for point in e[0]: point[3] = point[3] / max_charge # Make train numpy sets train_features = np.empty((len(train), 128, 128, 3), dtype=np.uint8) train_targets = np.empty((len(train), ), dtype=np.uint8) # Make train numpy sets for the broken spiral train_features_broken = np.empty((len(train), 32, 32, 3), dtype=np.uint8) train_targets_broken = np.empty((len(train), ), dtype=np.uint8) for i, event in enumerate(train): e = event[0] c = e[:, 3].flatten() p = e[:, 4].flatten() if projection == 'zy': x = e[:, 2].flatten() z = e[:, 1].flatten() elif projection == 'xy': x = e[:, 0].flatten() z = e[:, 1].flatten() else: raise ValueError('Invalid projection value.') # deleting part of the Spiral x_b = [] z_b = [] c_b = [] for j in range(len(x)): if -10 < x[j] < 127.5 and -117.5 < z[j] < 20: x_b.append(x[j]) z_b.append(z[j]) c_b.append(c[j]) c[j] = 0 # Make numpy array image of spiral context fig = plt.figure(figsize=(1, 1), dpi=128) if projection == 'zy': plt.xlim(0.0, 1250.0) elif projection == 'xy': plt.xlim(-275.0, 275.0) plt.ylim((-275.0, 275.0)) plt.axis('off') plt.scatter(x, z, s=0.6, c=c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) train_features[i] = data train_targets[i] = event[1] plt.close() # Make numpy array image of broken spiral fig = plt.figure(figsize=(1, 1), dpi=32) if projection == 'zy': plt.xlim(0.0, 1250.0) elif projection == 'xy': plt.xlim(-10, 127.5) plt.ylim((-117.5, 20)) plt.axis('off') plt.scatter(x_b, z_b, s=0.6, c=c_b, cmap='Greys') fig.canvas.draw() data_broken = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data_broken = np.delete(data_broken, 3, axis=2) train_features_broken[i] = data_broken train_targets[i] = event[1] plt.close() # Make test numpy sets test_features = np.empty((len(test), 128, 128, 3), dtype=np.uint8) test_targets = np.empty((len(test), ), dtype=np.uint8) test_features_broken = np.empty((len(test), 32, 32, 3), dtype=np.uint8) test_targets_broken = np.empty((len(test), ), dtype=np.uint8) for i, event in enumerate(test): e = event[0] p = e[:, 4].flatten() if projection == 'zy': x = e[:, 2].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() elif projection == 'xy': x = e[:, 0].flatten() z = e[:, 1].flatten() c = e[:, 3].flatten() else: raise ValueError('Invalid projection value.') fig = plt.figure(figsize=(1, 1), dpi=128) x_b = [] z_b = [] c_b = [] for j in range(len(x)): if -10 < x[j] < 127.5 and -117.5 < z[j] < 20: x_b.append(x[j]) z_b.append(z[j]) c_b.append(c[j]) c[j] = 0 fig = plt.figure(figsize=(1, 1), dpi=128) if projection == 'zy': plt.xlim(0.0, 1250.0) elif projection == 'xy': plt.xlim(-275.0, 275.0) plt.ylim((-275.0, 275.0)) plt.axis('off') plt.scatter(x, z, s=0.6, c=c, cmap='Greys') fig.canvas.draw() data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data = np.delete(data, 3, axis=2) test_features[i] = data test_targets[i] = event[1] plt.close() fig = plt.figure(figsize=(1, 1), dpi=32) if projection == 'zy': plt.xlim(0.0, 1250.0) elif projection == 'xy': plt.xlim(-10, 127.5) plt.ylim((-117.5, 20)) plt.axis('off') plt.scatter(x_b, z_b, s=0.6, c=c_b, cmap='Greys') fig.canvas.draw() data_broken = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) data_broken = np.delete(data_broken, 3, axis=2) test_features_broken[i] = data_broken test_targets_broken[i] = event[1] plt.close() if not os.path.exists(save_path): os.makedirs(save_path) filename = os.path.join(save_path, prefix + 'images.h5') # Save to HDF5 h5 = h5py.File(filename, 'w') h5.create_dataset('train_features', data=train_features) h5.create_dataset('train_targets', data=train_targets) h5.create_dataset('test_features', data=test_features) h5.create_dataset('test_targets', data=test_targets) h5.create_dataset('max_charge', data=np.array([max_charge])) h5.create_dataset('train_features_broken', data=train_features_broken) h5.create_dataset('train_targets_broken', data=train_targets_broken) h5.create_dataset('test_features_broken', data=test_features_broken) h5.create_dataset('test_targets_broken', data=test_targets_broken) h5.close()
def real_unlabeled_events(projection, save_path, prefix): print('Processing data...') data = [] events = pytpc.HDFDataFile('corrected_run_0210.h5', 'r') for x, event in enumerate(events): # Get pytpc xyzs xyzs = event.xyzs(peaks_only=True, return_pads=True, baseline_correction=False, cg_times=False) event_trace = np.ndarray(NUMBEROFPADS, dtype=object) # Get the events trace across each pad with h5py.File('corrected_run_0210.h5', 'r') as f: dset = f['get/' + str(x + 1)] trace = dset[:, 10:510] #works better when you exclude start and end TRACELENGTH = len(trace[0]) for i in range(NUMBEROFPADS): y = np.zeros(TRACELENGTH) pad_number = dset[i, 4] current_pad = trace[i] for j in range(TRACELENGTH): y[j] = current_pad[j] event_trace[pad_number] = y hit_peaks = [] for i in range(NUMBEROFPADS): trace = event_trace[i] trace_max = np.amax(trace) trace_average = _average(trace) charge = trace_max - trace_average peak_time = np.argmax(trace) #move window across trace for timebucket in range(25, 475): firstvalue = trace[timebucket - 25] middlevalue = trace[timebucket] lastvalue = trace[timebucket + 25] average_first_last = (firstvalue + lastvalue) * 0.5 peakheight = middlevalue - average_first_last if peakheight > THRESHOLD: # time charge padnumber hit_peaks.append([peak_time, charge, i]) break np.asarray(hit_peaks) HITPEAKSLENGTH = len(hit_peaks) # Merge pytpc xyzs array with correct charge values plot_points = np.zeros(shape=(HITPEAKSLENGTH, 4)) for i in range(HITPEAKSLENGTH): hitpeaks_padnum = hit_peaks[i][2] for j in range(NUMBEROFPADS): xyzs_padnum = xyzs[j][4] if (hitpeaks_padnum == xyzs_padnum): plot_points[i][0] = xyzs[j][0] #x plot_points[i][1] = xyzs[j][1] #y plot_points[i][2] = hit_peaks[i][0] #time plot_points[i][3] = hit_peaks[i][1] #charge data.append(plot_points) # Take the log of charge data log = np.vectorize(_l) for event in data: event[:, 3] = log(event[:, 3]) # Normalize max_charge = np.array(list(map(lambda x: x[:, 3].max(), data))).max() for e in data: for point in e: point[3] = point[3] / max_charge print('Making images...') # Make numpy set images = np.empty((len(data), 128, 128, 3), dtype=np.uint8) for i, event in enumerate(data): e = event if projection == 'zy': x = e[:, 2] #actually z (time) y = e[:, 1] charge = e[:, 3] elif projection == 'xy': x = e[:, 0] y = e[:, 1] charge = e[:, 3] else: raise ValueError('Invalid projection value.') fig = plt.figure(figsize=(1, 1), dpi=128) if projection == 'zy': plt.xlim(0.0, 512) if projection == 'xy': plt.xlim(-275.0, 275.0) plt.ylim((-275.0, 275.0)) plt.axis('off') #cmap='gray_r' the _r inverts greyscale so high charge is black #while low charge is white plt.scatter(x, y, s=0.6, c=charge, cmap='gray_r') fig.canvas.draw() eventData = np.array(fig.canvas.renderer._renderer, dtype=np.uint8) eventData = np.delete(eventData, 3, axis=2) images[i] = eventData plt.close() print('Saving file...') if not os.path.exists(save_path): os.makedirs(save_path) filename = os.path.join(save_path, prefix + 'images.h5') #save to HDF5 file h5 = h5py.File(filename, 'w') h5.create_dataset('images', data=images) #h5.create_dataset('max_charge', data=np.array([max_charge])) h5.close()
def simulated(data_dir, save_dir, prefix, noise): print('Starting...') proton_events = pytpc.HDFDataFile( os.path.join(data_dir, prefix + 'proton.h5'), 'r') carbon_events = pytpc.HDFDataFile( os.path.join(data_dir, prefix + 'carbon.h5'), 'r') # Create empty array to hold data data = [] # Add proton events to data array for i, event in enumerate(proton_events): xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) if noise: xyzs = dd.add_noise(xyzs).astype('float32') data.append( [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 0]) if i % 50 == 0: print('Proton event ' + str(i) + ' added.') # Add carbon events to data array for i, event in enumerate(carbon_events): xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) if noise: xyzs = dd.add_noise(xyzs).astype('float32') data.append( [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 1]) if i % 50 == 0: print('Carbon event ' + str(i) + ' added.') # Create junk events for i in range(len(proton_events)): xyzs = np.empty([1, 4]) xyzs = dd.add_noise(xyzs).astype('float32') data.append( [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 2]) if i % 50 == 0: print('Junk event ' + str(i) + ' added.') # Split into train and test sets data = shuffle(data) partition = int(len(data) * 0.8) train = data[:partition] test = data[partition:] train_features = [t[0] for t in train] train_targets = [t[1] for t in train] test_features = [t[0] for t in test] test_targets = [t[1] for t in test] train_features = sp.sparse.vstack(train_features, format='csr') test_features = sp.sparse.vstack(test_features, format='csr') # Save sp.sparse.save_npz( os.path.join(save_dir, '{}train-features.npz'.format(prefix)), train_features) sp.sparse.save_npz( os.path.join(save_dir, '{}test-features.npz'.format(prefix)), test_features) h5 = h5py.File(os.path.join(save_dir, '{}targets.h5'.format(prefix)), 'w') h5.create_dataset('train_targets', data=train_targets) h5.create_dataset('test_targets', data=test_targets) h5.close()
def real(data_dir, save_dir, prefix): # Create empty array to hold data data = [] for run in RUNS: run_filename = os.path.join(data_dir, 'run_{}.h5'.format(run)) labels_filename = os.path.join(data_dir, 'run_{}_labels.csv'.format(run)) events = pytpc.HDFDataFile(run_filename, 'r') labels = pd.read_csv(labels_filename, sep=',') proton_indices = labels.loc[(labels['label'] == 'p')]['evt_id'].values carbon_indices = labels.loc[(labels['label'] == 'c')]['evt_id'].values junk_indices = labels.loc[(labels['label'] == 'j')]['evt_id'].values for evt_id in proton_indices: event = events[str(evt_id)] xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append( [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 0]) for evt_id in carbon_indices: event = events[str(evt_id)] xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append( [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 1]) for evt_id in junk_indices: event = events[str(evt_id)] xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) data.append( [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 2]) # Split into train and test sets data = shuffle(data) partition = int(len(data) * 0.8) train = data[:partition] test = data[partition:] train_features = [t[0] for t in train] train_targets = [t[1] for t in train] test_features = [t[0] for t in test] test_targets = [t[1] for t in test] train_features = sp.sparse.vstack(train_features, format='csr') test_features = sp.sparse.vstack(test_features, format='csr') # Save sp.sparse.save_npz( os.path.join(save_dir, '{}train-features.npz'.format(prefix)), train_features) sp.sparse.save_npz( os.path.join(save_dir, '{}test-features.npz'.format(prefix)), test_features) h5 = h5py.File(os.path.join(save_dir, '{}targets.h5'.format(prefix)), 'w') h5.create_dataset('train_targets', data=train_targets) h5.create_dataset('test_targets', data=test_targets) h5.close()
repo = "/home/solli-comphys/github/attpc-classification-reproduction/" data_repo = repo+"data/datapoints/" import sys n_samples = 4001 filenames_c = [data_repo+str(1e5+i)+".npy" for i in range(int(10*n_samples))] filenames_p = [data_repo+str(2e5+i)+".npy" for i in range(int(10*n_samples))] sys.path.insert(0, repo + "modules") for i in tqdm(range(0, 10)): c_name = repo+"data/C_40000_tilt_largeEvts_{}.h5".format(i) p_name = repo+"data/p_40000_tilt_largeEvts_{}.h5".format(i) with pytpc.HDFDataFile(c_name, "r") as f: from representation_converter import TpcRepresent convert_obj = TpcRepresent(filenames_c[i*n_samples: (i+1)*n_samples]) events = [f[i] for i in range(len(f))] convert_obj.convert(events) with pytpc.HDFDataFile(p_name, "r") as f: from representation_converter import TpcRepresent convert_obj = TpcRepresent(filenames_p[i*n_samples: (i+1)*n_samples]) events = [f[i] for i in range(len(f))] convert_obj.convert(events)
carbon, and junk. """ import matplotlib.pyplot as plt import scipy as sp import numpy as np import pytpc import sys sys.path.insert(0, '../modules/') import dataDiscretization as dd data_path = '../data/tilt/' plot_path = '../cnn-plots/' #proton events with pytpc.HDFDataFile(data_path + 'p_40000_tilt_largeEvts.h5', 'r') as p_sim: n_evts = len(p_sim) evt_id = 0 while (evt_id < n_evts): curEvt = p_sim[evt_id] curxyz = curEvt.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) noise_xyz = dd.addNoise(curxyz) plt.figure(figsize=(1.28, 1.28), dpi=100) plt.plot(noise_xyz[:, 2],
from pytpc.hdfdata import HDFDataFile import pytpc from mpl_toolkits.mplot3d import Axes3D # noqa: F401 unused import import matplotlib.pyplot as plt repo = "/home/solli-comphys/github/attpc-classification-reproduction/" evt = 0 with pytpc.HDFDataFile(repo + "/data/C_40000_tilt_largeEvts.h5", "r") as f: evt = f[3] space_distr = evt.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False, baseline_correction=False, cg_times=False) xs = space_distr[:, 0] ys = space_distr[:, 1] zs = space_distr[:, 2] ch = space_distr[:, 3] import pandas as pd charge_series = pd.DataFrame(ch) fig = plt.figure() ax = fig.add_subplot(111, projection="3d")