Пример #1
0
def bulk_discretize(hdfPath, x_disc, y_disc, z_disc, charge, noise):
    """Discretizes all events in an HDF5 file using a grid geometry.

    Parameters
    ----------
    hdfPath : the system path to the hdf5 file to be
    x_disc  : number of slices in x
    y_disc  : number of slices in y
    z_disc  : number of slices in z
    charge  : boolean variable denoting whether or not charge will be included
              in the discretization
    noise   : boolean variable to add noise to (simulated) data-processing

    Returns
    -------
    A numpy array of shape (n, x_disc*y_disc*z_disc) where n is the number of
    events in the provided hdf5 file.
    """
    disc_evts = []

    with pytpc.HDFDataFile(hdfPath, 'r') as f:
        n_evts = len(f)
        evt_id = 0

        while evt_id < n_evts:
            curEvt = f[evt_id]
            curxyz = curEvt.xyzs(peaks_only=True,
                                 drift_vel=5.2,
                                 clock=12.5,
                                 return_pads=False,
                                 baseline_correction=False,
                                 cg_times=False)

            if noise:
                curxyz = add_noise(curxyz)

            if charge:
                disc_evts.append(
                    discretize_grid_charge(curxyz, x_disc, y_disc, z_disc))
            else:
                disc_evts.append(
                    discretize_grid(curxyz, x_disc, y_disc, z_disc))
            if evt_id % 1000 == 0:
                print("Discretized event " + str(evt_id))
            evt_id += 1

    discretized_data = sp.sparse.vstack(disc_evts, format='csr')
    print("Data discretization complete.")

    return discretized_data
def simulated(projection, data_dir, save_path, prefix):
    # however many pads we're trying to predict

    print('Processing data...')
    print(data_dir)
    proton_events = pytpc.HDFDataFile(os.path.join(data_dir, prefix + 'proton.h5'), 'r')
    carbon_events = pytpc.HDFDataFile(os.path.join(data_dir, prefix + 'carbon.h5'), 'r')

    # Create empty arrays to hold data
    data = []

    # Add proton events to data array
    for i, event in enumerate(proton_events):
        xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False,
                          baseline_correction=False, cg_times=False)
        data.append([xyzs, 0])

        if i % 50 == 0:
            print('Proton event ' + str(i) + ' added.')

    # Add carbon events to data array
    for i, event in enumerate(carbon_events):
        xyzs = event.xyzs(peaks_only=True, drift_vel=5.2, clock=12.5, return_pads=False,
                          baseline_correction=False, cg_times=False)
        data.append([xyzs, 1])

        if i % 50 == 0:
            print('Carbon event ' + str(i) + ' added.')

    # Take the log of charge data
    log = np.vectorize(_l)
    for event in data:
        event[0][:, 3] = log(event[0][:, 3])

    data = shuffle(data)
    partition = int(len(data) * 0.8)
    train = data[:partition]
    test = data[partition:]

    # Normalize
    max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), data))).max()

    for e in data:
        for point in e[0]:
            point[3] = point[3] / max_charge

    print('Making images...')

    # Make Training sets
    # Make numpy sets
    train_image_contexts = np.zeros((len(train), 128, 128, 3), dtype=np.uint8)
    train_images = np.zeros((len(train), 128, 128, 3), dtype=np.uint8)

    for i, event in enumerate(train):
        e = event[0]
        z = e[:, 1]
        c = e[:, 3]
        if projection == 'zy':
            x = e[:, 2]
        elif projection == 'xy':
            x = e[:, 0]
        else:
            raise ValueError('Invalid projection value.')
        # create lists for missing regions
        x_c = []
        z_c = []
        c_c = []
        for j in range(len(e)):

            # insert deleting condition here
            if not (-10 <= x[j] <= 127.5 and -117.5 <= z[j] <= 20):
                x_c.append(x[j])
                z_c.append(z[j])
                c_c.append(c[j])
                # c[j] = 0

        # make image context
        fig = plt.figure(figsize=(1, 1), dpi=128)
        ax = fig.add_axes([0, 0, 1, 1])
        if projection == 'zy':
            ax.set_xlim(0.0, 1250.0)
        elif projection == 'xy':
            ax.set_xlim(-275.0, 275.0)
        ax.set_ylim((-275.0, 275.0))
        ax.set_axis_off()
        ax.scatter(x_c, z_c, s=0.01, c=c_c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        train_image_contexts[i] = data
        plt.close()

        # make image
        fig = plt.figure(figsize=(1, 1), dpi=128)
        ax = fig.add_axes([0, 0, 1, 1])
        if projection == 'zy':
            ax.set_xlim(0.0, 1250.0)
        elif projection == 'xy':
            ax.set_xlim(-275.0, 275.0)
        ax.set_ylim((-275.0, 275.0))
        ax.set_axis_off()
        ax.scatter(x, z, s=0.01, c=c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        train_images[i] = data
        plt.close()

    # Make Testing sets
    # Make numpy sets
    test_image_contexts = np.zeros((len(test), 128, 128, 3), dtype=np.uint8)
    test_images = np.zeros((len(test), 128, 128, 3), dtype=np.uint8)

    for i, event in enumerate(test):
        e = event[0]
        z = e[:, 1]
        c = e[:, 3]
        if projection == 'zy':
            x = e[:, 2]
        elif projection == 'xy':
            x = e[:, 0]
        else:
            raise ValueError('Invalid projection value.')
        # create lists for missing regions
        x_c = []
        z_c = []
        c_c = []
        for j in range(len(e)):
            # insert deleting condition here
            if not (-10 <= x[j] <= 127.5 and -117.5 <= z[j] <= 20):
                x_c.append(x[j])
                z_c.append(z[j])
                c_c.append(c[j])

        # make image context
        fig = plt.figure(figsize=(1, 1), dpi=128)
        ax = fig.add_axes([0, 0, 1, 1])
        if projection == 'zy':
            ax.set_xlim(0.0, 1250.0)
        elif projection == 'xy':
            ax.set_xlim(-275.0, 275.0)
        ax.set_ylim((-275.0, 275.0))
        ax.set_axis_off()
        ax.scatter(x_c, z_c, s=0.01, c=c_c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        test_image_contexts[i] = data
        plt.close()

        # make image
        fig = plt.figure(figsize=(1, 1), dpi=128)
        ax = fig.add_axes([0, 0, 1, 1])
        if projection == 'zy':
            ax.set_xlim(0.0, 1250.0)
        elif projection == 'xy':
            ax.set_xlim(-275.0, 275.0)
        ax.set_ylim((-275.0, 275.0))
        ax.set_axis_off()
        ax.scatter(x, z, s=0.01, c=c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        test_images[i] = data
        plt.close()

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    print('Saving file...')

    filename = os.path.join(save_path, prefix + 'images.h5')
    # Save to HDF5
    h5 = h5py.File(filename, 'w')
    h5.create_dataset('train_image_contexts', data=train_image_contexts)
    h5.create_dataset('train_images', data=train_images)
    h5.create_dataset('test_image_contexts', data=test_image_contexts)
    h5.create_dataset('test_images', data=test_images)

    # h5.create_dataset('max_charge', data=np.array([max_charge]))
    h5.close()
Пример #3
0
def simulated_unlabeled(projection, noise, data_dir, save_path, prefix,
                        include_junk):
    print('Processing data...')

    proton_events = pytpc.HDFDataFile(
        os.path.join(data_dir, prefix + 'proton.h5'), 'r')
    carbon_events = pytpc.HDFDataFile(
        os.path.join(data_dir, prefix + 'carbon.h5'), 'r')

    # Create empty arrays to hold data
    data = []

    # Add proton events to data array
    for i, event in enumerate(proton_events):
        xyzs = event.xyzs(peaks_only=True,
                          drift_vel=5.2,
                          clock=12.5,
                          return_pads=False,
                          baseline_correction=False,
                          cg_times=False)

        if noise:
            # Add artificial noise
            xyzs = dd.add_noise(xyzs).astype('float32')

        data.append([xyzs, 0])

        if i % 50 == 0:
            print('Proton event ' + str(i) + ' added.')

    # Add carbon events to data array
    for i, event in enumerate(carbon_events):
        xyzs = event.xyzs(peaks_only=True,
                          drift_vel=5.2,
                          clock=12.5,
                          return_pads=False,
                          baseline_correction=False,
                          cg_times=False)

        if noise:
            # Add artificial noise
            xyzs = dd.add_noise(xyzs).astype('float32')

        data.append([xyzs, 1])

        if i % 50 == 0:
            print('Carbon event ' + str(i) + ' added.')

    # Create junk events
    if include_junk:
        for i in range(len(proton_events)):
            xyzs = np.empty([1, 4])
            if noise:
                xyzs = dd.add_noise(xyzs).astype('float32')
            data.append([xyzs, 2])

            if i % 50 == 0:
                print('Junk event ' + str(i) + ' added.')

    # Take the log of charge data
    log = np.vectorize(_l)

    for event in data:
        event[0][:, 3] = log(event[0][:, 3])

    data = shuffle(data)

    # Normalize
    max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), data))).max()

    for e in data:
        for point in e[0]:
            point[3] = point[3] / max_charge

    print('Making images...')

    # Make numpy sets
    images = np.empty((len(data), 128, 128, 3), dtype=np.uint8)

    for i, event in enumerate(data):
        e = event[0]
        if projection == 'zy':
            x = e[:, 2].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        elif projection == 'xy':
            x = e[:, 0].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        else:
            raise ValueError('Invalid projection value.')
        fig = plt.figure(figsize=(1, 1), dpi=128)
        if projection == 'zy':
            plt.xlim(0.0, 1250.0)
        elif projection == 'xy':
            plt.xlim(-275.0, 275.0)
        plt.ylim((-275.0, 275.0))
        plt.axis('off')
        plt.scatter(x, z, s=0.6, c=c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        images[i] = data
        plt.close()

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    print('Saving file...')

    filename = os.path.join(save_path, prefix + 'images.h5')

    # Save to HDF5
    h5 = h5py.File(filename, 'w')
    h5.create_dataset('images', data=images)
    h5.create_dataset('max_charge', data=np.array([max_charge]))
    h5.close()
Пример #4
0
def real_labeled(projection, data_dir, save_path, prefix):
    print('Processing data...')
    data = []
    for run in RUNS:
        events_file = os.path.join(data_dir, 'run_{}.h5'.format(run))
        labels_file = os.path.join(data_dir, 'run_{}_labels.csv'.format(run))

        events = pytpc.HDFDataFile(events_file, 'r')
        labels = pd.read_csv(labels_file, sep=',')

        proton_indices = labels.loc[(labels['label'] == 'p')]['evt_id'].values
        carbon_indices = labels.loc[(labels['label'] == 'c')]['evt_id'].values
        junk_indices = labels.loc[(labels['label'] == 'j')]['evt_id'].values

        for evt_id in proton_indices:
            event = events[str(evt_id)]
            xyzs = event.xyzs(peaks_only=True,
                              drift_vel=5.2,
                              clock=12.5,
                              return_pads=False,
                              baseline_correction=False,
                              cg_times=False)

            data.append([xyzs, 0])

        for evt_id in carbon_indices:
            event = events[str(evt_id)]
            xyzs = event.xyzs(peaks_only=True,
                              drift_vel=5.2,
                              clock=12.5,
                              return_pads=False,
                              baseline_correction=False,
                              cg_times=False)

            data.append([xyzs, 1])

        for evt_id in junk_indices:
            event = events[str(evt_id)]
            xyzs = event.xyzs(peaks_only=True,
                              drift_vel=5.2,
                              clock=12.5,
                              return_pads=False,
                              baseline_correction=False,
                              cg_times=False)

            data.append([xyzs, 2])

    log = np.vectorize(_l)

    for event in data:
        event[0][:, 3] = log(event[0][:, 3])

    # Shuffle data
    data = shuffle(data)

    # Split into train and test sets
    partition = int(len(data) * 0.8)
    train = data[:partition]
    test = data[partition:]

    # Normalize
    max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), train))).max()

    for e in train:
        for point in e[0]:
            point[3] = point[3] / max_charge

    for e in test:
        for point in e[0]:
            point[3] = point[3] / max_charge

    print('Making images...')

    # Make train numpy sets
    train_features = np.empty((len(train), 128, 128, 3), dtype=np.uint8)
    train_targets = np.empty((len(train), ), dtype=np.uint8)

    for i, event in enumerate(train):
        e = event[0]
        if projection == 'zy':
            x = e[:, 2].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        elif projection == 'xy':
            x = e[:, 0].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        else:
            raise ValueError('Invalid projection value.')
        fig = plt.figure(figsize=(1, 1), dpi=128)
        if projection == 'zy':
            plt.xlim(0.0, 1250.0)
        elif projection == 'xy':
            plt.xlim(-275.0, 275.0)
        plt.ylim((-275.0, 275.0))
        plt.axis('off')
        plt.scatter(x, z, s=0.6, c=c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        train_features[i] = data
        train_targets[i] = event[1]
        plt.close()

    # Make test numpy sets
    test_features = np.empty((len(test), 128, 128, 3), dtype=np.uint8)
    test_targets = np.empty((len(test), ), dtype=np.uint8)

    for i, event in enumerate(test):
        e = event[0]
        if projection == 'zy':
            x = e[:, 2].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        elif projection == 'xy':
            x = e[:, 0].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        else:
            raise ValueError('Invalid projection value.')
        fig = plt.figure(figsize=(1, 1), dpi=128)
        if projection == 'zy':
            plt.xlim(0.0, 1250.0)
        elif projection == 'xy':
            plt.xlim(-275.0, 275.0)
        plt.ylim((-275.0, 275.0))
        plt.axis('off')
        plt.scatter(x, z, s=0.6, c=c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        test_features[i] = data
        test_targets[i] = event[1]
        plt.close()

    print('Saving file...')

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    filename = os.path.join(save_path, prefix + 'images.h5')

    # Save to HDF5
    h5 = h5py.File(filename, 'w')
    h5.create_dataset('train_features', data=train_features)
    h5.create_dataset('train_targets', data=train_targets)
    h5.create_dataset('test_features', data=test_features)
    h5.create_dataset('test_targets', data=test_targets)
    h5.create_dataset('max_charge', data=np.array([max_charge]))
    h5.close()
Пример #5
0
def real_unlabeled(projection, data_dir, save_path, prefix):
    print('Processing data...')
    data = []
    for run in RUNS:
        events_file = os.path.join(data_dir, 'run_{}.h5'.format(run))
        events = pytpc.HDFDataFile(events_file, 'r')

        for event in events:
            xyzs = event.xyzs(peaks_only=True,
                              drift_vel=5.2,
                              clock=12.5,
                              return_pads=False,
                              baseline_correction=False,
                              cg_times=False)

            data.append([xyzs, -1])

    # Take the log of charge data
    log = np.vectorize(_l)

    for event in data:
        event[0][:, 3] = log(event[0][:, 3])

    # Shuffle data
    data = shuffle(data)

    # Normalize
    max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), data))).max()

    for e in data:
        for point in e[0]:
            point[3] = point[3] / max_charge

    print('Making images...')

    # Make numpy sets
    images = np.empty((len(data), 128, 128, 3), dtype=np.uint8)

    for i, event in enumerate(data):
        e = event[0]
        if projection == 'zy':
            x = e[:, 2].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        elif projection == 'xy':
            x = e[:, 0].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        else:
            raise ValueError('Invalid projection value.')
        fig = plt.figure(figsize=(1, 1), dpi=128)
        if projection == 'zy':
            plt.xlim(0.0, 1250.0)
        elif projection == 'xy':
            plt.xlim(-275.0, 275.0)
        plt.ylim((-275.0, 275.0))
        plt.axis('off')
        plt.scatter(x, z, s=0.6, c=c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        images[i] = data
        plt.close()

    print('Saving file...')

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    filename = os.path.join(save_path, prefix + 'images.h5')

    # Save to HDF5
    h5 = h5py.File(filename, 'w')
    h5.create_dataset('images', data=images)
    h5.create_dataset('max_charge', data=np.array([max_charge]))
    h5.close()
Пример #6
0
import dataDiscretization as dd
import scipy as sp
import pandas as pd

import pytpc

data_path = '../data/real/'
disc_path = data_path + '50x50x50/'
runs = ['0130', '0210']

x_disc = 50
y_disc = 50
z_disc = 50

for run in runs:
    data = pytpc.HDFDataFile(data_path + "run_" + run + ".h5", 'r')
    labels = pd.read_csv(data_path + "run_" + run + "_labels.csv", sep=',')
    print("Successfully loaded data and labels for run " + str(run) + ".")

    #discretize proton events
    p_indices = labels.loc[(labels['label'] == 'p')]['evt_id'].values
    p_discEvts = []

    for evt_id in p_indices:
        curEvt = data[evt_id]
        curxyz = curEvt.xyzs(peaks_only=True,
                             drift_vel=5.2,
                             clock=12.5,
                             return_pads=False,
                             baseline_correction=False,
                             cg_times=False)
def simulated(projection, noise, data_dir, save_path, prefix):

    print("We are in the simulared() function!")
    print('Processing data...')

    proton_events = pytpc.HDFDataFile(
        os.path.join(data_dir, prefix + 'proton.h5'), 'r')
    carbon_events = pytpc.HDFDataFile(
        os.path.join(data_dir, prefix + 'carbon.h5'), 'r')

    # Create empty arrays to hold data-processing
    data = []

    # Add proton events to data-processing array
    for i, event in enumerate(proton_events):

        xyzs = event.xyzs(peaks_only=True,
                          drift_vel=5.2,
                          clock=12.5,
                          return_pads=True,
                          baseline_correction=False,
                          cg_times=False)

        #if noise:
        # Add artificial noise
        #xyzs = dd.add_noise(xyzs).astype('float32')
        data.append([xyzs, 0])

        if i % 50 == 0:
            print('Proton event ' + str(i) + ' added.')

    for i, event in enumerate(carbon_events):

        xyzs = event.xyzs(peaks_only=True,
                          drift_vel=5.2,
                          clock=12.5,
                          return_pads=True,
                          baseline_correction=False,
                          cg_times=False)

        #if noise:
        # Add artificial noise
        #    xyzs = dd.add_noise(xyzs).astype('float32')

        data.append([xyzs, 1])

        if i % 50 == 0:
            print('Carbon event ' + str(i) + ' added.')

    # Create junk events
    #for i in range(len(proton_events)):

    #xyzs = np.empty([1, 4])
    #xyzs = dd.add_noise(xyzs).astype('float32')
    #data.append([xyzs, 2])

    #if i % 50 == 0:
    #print('Junk event ' + str(i) + ' added.')
    # Take the log of charge data-processing
    log = np.vectorize(_l)

    for event in data:
        event[0][:, 3] = log(event[0][:, 3])

    # Split into train and test sets
    data = shuffle(data)
    partition = int(len(data) * 0.8)
    train = data[:partition]
    test = data[partition:]

    # Normalize
    max_charge = np.array(list(map(lambda x: x[0][:, 3].max(), train))).max()

    for e in train:
        for point in e[0]:
            point[3] = point[3] / max_charge

    for e in test:
        for point in e[0]:
            point[3] = point[3] / max_charge

    # Make train numpy sets
    train_features = np.empty((len(train), 128, 128, 3), dtype=np.uint8)
    train_targets = np.empty((len(train), ), dtype=np.uint8)
    # Make train numpy sets for the broken spiral
    train_features_broken = np.empty((len(train), 32, 32, 3), dtype=np.uint8)
    train_targets_broken = np.empty((len(train), ), dtype=np.uint8)
    for i, event in enumerate(train):
        e = event[0]
        c = e[:, 3].flatten()
        p = e[:, 4].flatten()
        if projection == 'zy':
            x = e[:, 2].flatten()
            z = e[:, 1].flatten()

        elif projection == 'xy':
            x = e[:, 0].flatten()
            z = e[:, 1].flatten()

        else:
            raise ValueError('Invalid projection value.')


# deleting part of the Spiral
        x_b = []
        z_b = []
        c_b = []
        for j in range(len(x)):
            if -10 < x[j] < 127.5 and -117.5 < z[j] < 20:
                x_b.append(x[j])
                z_b.append(z[j])
                c_b.append(c[j])
                c[j] = 0

        # Make numpy array image of spiral context
        fig = plt.figure(figsize=(1, 1), dpi=128)
        if projection == 'zy':
            plt.xlim(0.0, 1250.0)
        elif projection == 'xy':
            plt.xlim(-275.0, 275.0)

        plt.ylim((-275.0, 275.0))

        plt.axis('off')
        plt.scatter(x, z, s=0.6, c=c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data = np.delete(data, 3, axis=2)
        train_features[i] = data
        train_targets[i] = event[1]
        plt.close()

        # Make numpy array image of broken spiral
        fig = plt.figure(figsize=(1, 1), dpi=32)
        if projection == 'zy':
            plt.xlim(0.0, 1250.0)
        elif projection == 'xy':
            plt.xlim(-10, 127.5)
        plt.ylim((-117.5, 20))
        plt.axis('off')
        plt.scatter(x_b, z_b, s=0.6, c=c_b, cmap='Greys')

        fig.canvas.draw()
        data_broken = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        data_broken = np.delete(data_broken, 3, axis=2)
        train_features_broken[i] = data_broken
        train_targets[i] = event[1]
        plt.close()

    # Make test numpy sets
    test_features = np.empty((len(test), 128, 128, 3), dtype=np.uint8)
    test_targets = np.empty((len(test), ), dtype=np.uint8)

    test_features_broken = np.empty((len(test), 32, 32, 3), dtype=np.uint8)
    test_targets_broken = np.empty((len(test), ), dtype=np.uint8)

    for i, event in enumerate(test):
        e = event[0]
        p = e[:, 4].flatten()
        if projection == 'zy':
            x = e[:, 2].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        elif projection == 'xy':
            x = e[:, 0].flatten()
            z = e[:, 1].flatten()
            c = e[:, 3].flatten()
        else:
            raise ValueError('Invalid projection value.')
        fig = plt.figure(figsize=(1, 1), dpi=128)
        x_b = []
        z_b = []
        c_b = []
        for j in range(len(x)):
            if -10 < x[j] < 127.5 and -117.5 < z[j] < 20:
                x_b.append(x[j])
                z_b.append(z[j])
                c_b.append(c[j])
                c[j] = 0
        fig = plt.figure(figsize=(1, 1), dpi=128)
        if projection == 'zy':
            plt.xlim(0.0, 1250.0)
        elif projection == 'xy':
            plt.xlim(-275.0, 275.0)
        plt.ylim((-275.0, 275.0))
        plt.axis('off')
        plt.scatter(x, z, s=0.6, c=c, cmap='Greys')
        fig.canvas.draw()
        data = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)

        data = np.delete(data, 3, axis=2)

        test_features[i] = data
        test_targets[i] = event[1]
        plt.close()

        fig = plt.figure(figsize=(1, 1), dpi=32)
        if projection == 'zy':
            plt.xlim(0.0, 1250.0)
        elif projection == 'xy':
            plt.xlim(-10, 127.5)
        plt.ylim((-117.5, 20))
        plt.axis('off')
        plt.scatter(x_b, z_b, s=0.6, c=c_b, cmap='Greys')
        fig.canvas.draw()
        data_broken = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)

        data_broken = np.delete(data_broken, 3, axis=2)

        test_features_broken[i] = data_broken
        test_targets_broken[i] = event[1]
        plt.close()

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    filename = os.path.join(save_path, prefix + 'images.h5')

    # Save to HDF5
    h5 = h5py.File(filename, 'w')
    h5.create_dataset('train_features', data=train_features)
    h5.create_dataset('train_targets', data=train_targets)
    h5.create_dataset('test_features', data=test_features)
    h5.create_dataset('test_targets', data=test_targets)
    h5.create_dataset('max_charge', data=np.array([max_charge]))

    h5.create_dataset('train_features_broken', data=train_features_broken)
    h5.create_dataset('train_targets_broken', data=train_targets_broken)
    h5.create_dataset('test_features_broken', data=test_features_broken)
    h5.create_dataset('test_targets_broken', data=test_targets_broken)
    h5.close()
Пример #8
0
def real_unlabeled_events(projection, save_path, prefix):
    print('Processing data...')
    data = []

    events = pytpc.HDFDataFile('corrected_run_0210.h5', 'r')
    for x, event in enumerate(events):
        # Get pytpc xyzs
        xyzs = event.xyzs(peaks_only=True,
                          return_pads=True,
                          baseline_correction=False,
                          cg_times=False)

        event_trace = np.ndarray(NUMBEROFPADS, dtype=object)
        # Get the events trace across each pad
        with h5py.File('corrected_run_0210.h5', 'r') as f:
            dset = f['get/' + str(x + 1)]
            trace = dset[:,
                         10:510]  #works better when you exclude start and end
            TRACELENGTH = len(trace[0])
            for i in range(NUMBEROFPADS):
                y = np.zeros(TRACELENGTH)
                pad_number = dset[i, 4]
                current_pad = trace[i]
                for j in range(TRACELENGTH):
                    y[j] = current_pad[j]
                event_trace[pad_number] = y

        hit_peaks = []
        for i in range(NUMBEROFPADS):
            trace = event_trace[i]
            trace_max = np.amax(trace)
            trace_average = _average(trace)
            charge = trace_max - trace_average
            peak_time = np.argmax(trace)
            #move window across trace
            for timebucket in range(25, 475):
                firstvalue = trace[timebucket - 25]
                middlevalue = trace[timebucket]
                lastvalue = trace[timebucket + 25]
                average_first_last = (firstvalue + lastvalue) * 0.5
                peakheight = middlevalue - average_first_last
                if peakheight > THRESHOLD:  #  time     charge   padnumber
                    hit_peaks.append([peak_time, charge, i])
                    break
        np.asarray(hit_peaks)
        HITPEAKSLENGTH = len(hit_peaks)

        # Merge pytpc xyzs array with correct charge values
        plot_points = np.zeros(shape=(HITPEAKSLENGTH, 4))
        for i in range(HITPEAKSLENGTH):
            hitpeaks_padnum = hit_peaks[i][2]
            for j in range(NUMBEROFPADS):
                xyzs_padnum = xyzs[j][4]
                if (hitpeaks_padnum == xyzs_padnum):
                    plot_points[i][0] = xyzs[j][0]  #x
                    plot_points[i][1] = xyzs[j][1]  #y
                    plot_points[i][2] = hit_peaks[i][0]  #time
                    plot_points[i][3] = hit_peaks[i][1]  #charge

        data.append(plot_points)

    # Take the log of charge data
    log = np.vectorize(_l)

    for event in data:
        event[:, 3] = log(event[:, 3])

    # Normalize
    max_charge = np.array(list(map(lambda x: x[:, 3].max(), data))).max()

    for e in data:
        for point in e:
            point[3] = point[3] / max_charge

    print('Making images...')

    # Make numpy set
    images = np.empty((len(data), 128, 128, 3), dtype=np.uint8)

    for i, event in enumerate(data):
        e = event
        if projection == 'zy':
            x = e[:, 2]  #actually z (time)
            y = e[:, 1]
            charge = e[:, 3]
        elif projection == 'xy':
            x = e[:, 0]
            y = e[:, 1]
            charge = e[:, 3]
        else:
            raise ValueError('Invalid projection value.')
        fig = plt.figure(figsize=(1, 1), dpi=128)
        if projection == 'zy':
            plt.xlim(0.0, 512)
        if projection == 'xy':
            plt.xlim(-275.0, 275.0)
        plt.ylim((-275.0, 275.0))
        plt.axis('off')
        #cmap='gray_r' the _r inverts greyscale so high charge is black
        #while low charge is white
        plt.scatter(x, y, s=0.6, c=charge, cmap='gray_r')
        fig.canvas.draw()
        eventData = np.array(fig.canvas.renderer._renderer, dtype=np.uint8)
        eventData = np.delete(eventData, 3, axis=2)
        images[i] = eventData
        plt.close()

    print('Saving file...')

    if not os.path.exists(save_path):
        os.makedirs(save_path)

    filename = os.path.join(save_path, prefix + 'images.h5')

    #save to HDF5 file
    h5 = h5py.File(filename, 'w')
    h5.create_dataset('images', data=images)
    #h5.create_dataset('max_charge', data=np.array([max_charge]))
    h5.close()
Пример #9
0
def simulated(data_dir, save_dir, prefix, noise):
    print('Starting...')

    proton_events = pytpc.HDFDataFile(
        os.path.join(data_dir, prefix + 'proton.h5'), 'r')
    carbon_events = pytpc.HDFDataFile(
        os.path.join(data_dir, prefix + 'carbon.h5'), 'r')

    # Create empty array to hold data
    data = []

    # Add proton events to data array
    for i, event in enumerate(proton_events):
        xyzs = event.xyzs(peaks_only=True,
                          drift_vel=5.2,
                          clock=12.5,
                          return_pads=False,
                          baseline_correction=False,
                          cg_times=False)

        if noise:
            xyzs = dd.add_noise(xyzs).astype('float32')

        data.append(
            [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 0])

        if i % 50 == 0:
            print('Proton event ' + str(i) + ' added.')

    # Add carbon events to data array
    for i, event in enumerate(carbon_events):
        xyzs = event.xyzs(peaks_only=True,
                          drift_vel=5.2,
                          clock=12.5,
                          return_pads=False,
                          baseline_correction=False,
                          cg_times=False)

        if noise:
            xyzs = dd.add_noise(xyzs).astype('float32')

        data.append(
            [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 1])

        if i % 50 == 0:
            print('Carbon event ' + str(i) + ' added.')

    # Create junk events
    for i in range(len(proton_events)):
        xyzs = np.empty([1, 4])
        xyzs = dd.add_noise(xyzs).astype('float32')
        data.append(
            [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 2])

        if i % 50 == 0:
            print('Junk event ' + str(i) + ' added.')

    # Split into train and test sets
    data = shuffle(data)
    partition = int(len(data) * 0.8)
    train = data[:partition]
    test = data[partition:]

    train_features = [t[0] for t in train]
    train_targets = [t[1] for t in train]
    test_features = [t[0] for t in test]
    test_targets = [t[1] for t in test]

    train_features = sp.sparse.vstack(train_features, format='csr')
    test_features = sp.sparse.vstack(test_features, format='csr')

    # Save
    sp.sparse.save_npz(
        os.path.join(save_dir, '{}train-features.npz'.format(prefix)),
        train_features)
    sp.sparse.save_npz(
        os.path.join(save_dir, '{}test-features.npz'.format(prefix)),
        test_features)
    h5 = h5py.File(os.path.join(save_dir, '{}targets.h5'.format(prefix)), 'w')
    h5.create_dataset('train_targets', data=train_targets)
    h5.create_dataset('test_targets', data=test_targets)
    h5.close()
Пример #10
0
def real(data_dir, save_dir, prefix):
    # Create empty array to hold data
    data = []

    for run in RUNS:
        run_filename = os.path.join(data_dir, 'run_{}.h5'.format(run))
        labels_filename = os.path.join(data_dir,
                                       'run_{}_labels.csv'.format(run))
        events = pytpc.HDFDataFile(run_filename, 'r')
        labels = pd.read_csv(labels_filename, sep=',')

        proton_indices = labels.loc[(labels['label'] == 'p')]['evt_id'].values
        carbon_indices = labels.loc[(labels['label'] == 'c')]['evt_id'].values
        junk_indices = labels.loc[(labels['label'] == 'j')]['evt_id'].values

        for evt_id in proton_indices:
            event = events[str(evt_id)]
            xyzs = event.xyzs(peaks_only=True,
                              drift_vel=5.2,
                              clock=12.5,
                              return_pads=False,
                              baseline_correction=False,
                              cg_times=False)

            data.append(
                [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 0])

        for evt_id in carbon_indices:
            event = events[str(evt_id)]
            xyzs = event.xyzs(peaks_only=True,
                              drift_vel=5.2,
                              clock=12.5,
                              return_pads=False,
                              baseline_correction=False,
                              cg_times=False)

            data.append(
                [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 1])

        for evt_id in junk_indices:
            event = events[str(evt_id)]
            xyzs = event.xyzs(peaks_only=True,
                              drift_vel=5.2,
                              clock=12.5,
                              return_pads=False,
                              baseline_correction=False,
                              cg_times=False)

            data.append(
                [dd.discretize_grid_charge(xyzs, X_DISC, Y_DISC, Z_DISC), 2])

    # Split into train and test sets
    data = shuffle(data)
    partition = int(len(data) * 0.8)
    train = data[:partition]
    test = data[partition:]

    train_features = [t[0] for t in train]
    train_targets = [t[1] for t in train]
    test_features = [t[0] for t in test]
    test_targets = [t[1] for t in test]

    train_features = sp.sparse.vstack(train_features, format='csr')
    test_features = sp.sparse.vstack(test_features, format='csr')

    # Save
    sp.sparse.save_npz(
        os.path.join(save_dir, '{}train-features.npz'.format(prefix)),
        train_features)
    sp.sparse.save_npz(
        os.path.join(save_dir, '{}test-features.npz'.format(prefix)),
        test_features)
    h5 = h5py.File(os.path.join(save_dir, '{}targets.h5'.format(prefix)), 'w')
    h5.create_dataset('train_targets', data=train_targets)
    h5.create_dataset('test_targets', data=test_targets)
    h5.close()
Пример #11
0
repo = "/home/solli-comphys/github/attpc-classification-reproduction/"
data_repo = repo+"data/datapoints/"

import sys

n_samples = 4001

filenames_c = [data_repo+str(1e5+i)+".npy" for i in range(int(10*n_samples))]

filenames_p = [data_repo+str(2e5+i)+".npy" for i in range(int(10*n_samples))]

sys.path.insert(0, repo + "modules")

for i in tqdm(range(0, 10)):
    c_name = repo+"data/C_40000_tilt_largeEvts_{}.h5".format(i)
    p_name = repo+"data/p_40000_tilt_largeEvts_{}.h5".format(i)

    with pytpc.HDFDataFile(c_name, "r") as f:
        from representation_converter import TpcRepresent

        convert_obj = TpcRepresent(filenames_c[i*n_samples: (i+1)*n_samples])
        events = [f[i] for i in range(len(f))]
        convert_obj.convert(events)

    with pytpc.HDFDataFile(p_name, "r") as f:
        from representation_converter import TpcRepresent
        convert_obj = TpcRepresent(filenames_p[i*n_samples: (i+1)*n_samples])
        events = [f[i] for i in range(len(f))]
        convert_obj.convert(events)
Пример #12
0
carbon, and junk.
"""
import matplotlib.pyplot as plt
import scipy as sp
import numpy as np

import pytpc
import sys
sys.path.insert(0, '../modules/')
import dataDiscretization as dd

data_path = '../data/tilt/'
plot_path = '../cnn-plots/'

#proton events
with pytpc.HDFDataFile(data_path + 'p_40000_tilt_largeEvts.h5', 'r') as p_sim:
    n_evts = len(p_sim)
    evt_id = 0

    while (evt_id < n_evts):
        curEvt = p_sim[evt_id]
        curxyz = curEvt.xyzs(peaks_only=True,
                             drift_vel=5.2,
                             clock=12.5,
                             return_pads=False,
                             baseline_correction=False,
                             cg_times=False)
        noise_xyz = dd.addNoise(curxyz)

        plt.figure(figsize=(1.28, 1.28), dpi=100)
        plt.plot(noise_xyz[:, 2],
from pytpc.hdfdata import HDFDataFile
import pytpc

from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
import matplotlib.pyplot as plt

repo = "/home/solli-comphys/github/attpc-classification-reproduction/"

evt = 0

with pytpc.HDFDataFile(repo + "/data/C_40000_tilt_largeEvts.h5", "r") as f:
    evt = f[3]

space_distr = evt.xyzs(peaks_only=True,
                       drift_vel=5.2,
                       clock=12.5,
                       return_pads=False,
                       baseline_correction=False,
                       cg_times=False)

xs = space_distr[:, 0]
ys = space_distr[:, 1]
zs = space_distr[:, 2]
ch = space_distr[:, 3]

import pandas as pd

charge_series = pd.DataFrame(ch)

fig = plt.figure()
ax = fig.add_subplot(111, projection="3d")