Пример #1
0
def main(args):
    zcut = 0.007
    beta = -1
    thetacut = 0.0009
    if args.data:
        sdmult = SoftDropMult(zcut=zcut, beta=beta, thetacut=thetacut)
        reader = Jets(args.data, args.nev)
        events = reader.values()
        imgs_ref = np.zeros((len(events), args.npx, args.npx))
        li_gen = LundImage(npxlx=args.npx, y_axis=args.yaxis)
        nsd_ref = []
        for i, jet in enumerate(events):
            tree = JetTree(jet)
            nsd_ref.append(sdmult(tree))
            imgs_ref[i] = li_gen(tree)
        imgref = np.average(imgs_ref, axis=0)
    else:
        imgref = None
    folder = args.output.strip('/') + '/' if args.output else ''

    assert (len(args.label_data_pairs) % 2 == 0)
    filedic = {}
    for i in range(0, len(args.label_data_pairs), 2):
        lab = args.label_data_pairs[i]
        filedic[lab] = args.label_data_pairs[i + 1]

    print('Plotting soft drop multiplicity')
    plot_sdmult(filedic,
                imgs_ref,
                folder + 'softdropmult.pdf',
                nsd_ref,
                npx=args.npx,
                zcut=zcut,
                beta=beta,
                thetacut=thetacut)
Пример #2
0
def main(args):
    if args.data:
        reader=Jets(args.data, args.nev)
        events=reader.values()
        imgs_ref=np.zeros((len(events), args.npx, args.npx))
        li_gen=LundImage(npxlx = args.npx, y_axis=args.yaxis)
        for i, jet in enumerate(events): 
            tree = JetTree(jet) 
            imgs_ref[i]=li_gen(tree)
        imgref=np.average(imgs_ref,axis=0)
    else:
        imgref=None
    folder = args.output.strip('/')+'/' if args.output else ''

    assert(len(args.label_data_pairs)%2==0)
    filedic={}
    for i in range(0,len(args.label_data_pairs),2):
        lab=args.label_data_pairs[i]
        filedic[lab] = args.label_data_pairs[i+1]

    print('Plotting slices')
    if imgref is not None:
        plot_activation(filedic, imgs_ref, folder+'activation.pdf')
        plot_slice_delta(filedic, imgref, folder+'delta_slice.pdf', args.npx)
        plot_slice_kt(filedic, imgref, folder+'kt_slice.pdf', args.npx)
    else:
        plot_slice_delta_noratio(filedic, folder+'delta_slice.pdf', args.npx)
        plot_slice_kt_noratio(filedic, folder+'kt_slice.pdf', args.npx)
Пример #3
0
    def load_data(self, hps):
        self.dataset_name = '%s2%s' % (hps['labelA'], hps['labelB'])
        self.lund = LundImage(npxlx=self.img_rows, npxly=self.img_cols)
        self.imagesA = []
        self.imagesB = []
        reader = Jets(hps['fnA'], hps['nev'])
        jets = reader.values()
        for j in jets:
            tree = JetTree(j)
            li = self.lund(tree)
            self.imagesA.append(li[:,:,np.newaxis])
        reader = Jets(hps['fnB'], hps['nev'])
        jets = reader.values()
        for j in jets:
            tree = JetTree(j)
            li = self.lund(tree)
            self.imagesB.append(li[:,:,np.newaxis])
        # now do batch averaging
        self.avg = Averager(hps['navg'])
        self.imagesA = self.avg.transform(np.array(self.imagesA))
        self.imagesB = self.avg.transform(np.array(self.imagesB))

        # now create he preprocessors
        if hps['zca']:
            self.preproc = PreprocessorZCA(scaler=hps['scaler'], flatten=False,
                                           remove_zero=False, pxl_by_pxl=False)
        else:
            self.preproc = Preprocessor(scaler=hps['scaler'], flatten=False,
                                        remove_zero=False, pxl_by_pxl=False)
        # and preprocess the input images
        self.preproc.fit(np.concatenate((self.imagesA,self.imagesB)))
        self.imagesA = self.preproc.transform(self.imagesA)
        self.imagesB = self.preproc.transform(self.imagesB)
Пример #4
0
def main(args):
    rem0=not args.keepzero
    scaler=not args.noscaler
    flatten=not args.noflat

    # read in the data set
    if args.mnist:
        # for debugging purposes, we have the option of loading in the
        # mnist data and training the model on this.
        (img_train, _), (_, _) = mnist.load_data()
        # Rescale -1 to 1
        img_train = img_train.astype('float32') / 255
        img_train = np.expand_dims(img_train, axis=3)
    else:
        # load in the jets from file, and create an array of lund images
        reader=Jets(args.data, args.nev)
        events=reader.values() 
        img_train=np.zeros((len(events), args.npx, args.npx, 1))
        li_gen=LundImage(npxlx = args.npx, y_axis=args.yaxis) 
        for i, jet in enumerate(events): 
            tree = JetTree(jet) 
            img_train[i]=li_gen(tree).reshape(args.npx, args.npx, 1)

    # set up the preprocessing pipeline
    if args.pca:
        preprocess = PreprocessorPCA(args.pca, whiten=True, scaler=scaler,
                                     flatten=flatten, remove_zero=rem0)
    elif args.autoencoder:
        preprocess = PreprocessorAE(args.autoencoder, args.epochs, scaler=scaler,
                                    flatten=flatten, remove_zero=rem0)
    elif args.zca:
        preprocess = PreprocessorZCA(scaler=scaler, flatten=flatten,
                                     remove_zero=rem0)
    else:
        preprocess = Preprocessor(scaler=scaler, flatten=flatten,
                                  remove_zero=rem0)
    # fit the preprocessing unit
    preprocess.fit(img_train)
    # transform the images
    # NB: for ZCA, the zca factor is set during this step
    img_transf = preprocess.transform(img_train)
    print('Shape after preprocessing:',img_transf.shape)

    # now transform back to images
    img_transf = preprocess.inverse(img_transf)

    r=5
    selec=np.random.choice(img_transf.shape[0], 2*r, replace=True)
    if args.mnist:
        ref_transf = img_transf.reshape(img_transf.shape[0],args.npx,args.npx)[selec, :]
    else:
        # now interpret the probabilistic sample as physical images
        for i,v in np.ndenumerate(img_transf):
            if v < np.random.uniform(0,1):
                img_transf[i]=0.0
            else:
                img_transf[i]=1.0
        ref_transf = img_transf.reshape(img_transf.shape[0],
                                        args.npx,args.npx)[selec, :]
    ref_train = img_train.reshape(img_train.shape[0],args.npx,args.npx)[selec, :]

    loss=np.linalg.norm(np.average(ref_train - ref_transf,axis=0))
    print('# loss: ',loss)

    fig, axs = plt.subplots(r, 4)
    axs[0,0].title.set_text('Input')
    axs[0,3].title.set_text('Decoded')
    for i in range(r):
        axs[i,0].imshow(ref_train[i, :,:], cmap='gray')
        axs[i,0].axis('off')
        axs[i,1].imshow(ref_train[r+i, :,:], cmap='gray')
        axs[i,1].axis('off')
        axs[i,2].imshow(ref_transf[i, :,:], cmap='gray')
        axs[i,2].axis('off')
        axs[i,3].imshow(ref_transf[5+i, :,:], cmap='gray')
        axs[i,3].axis('off')

    plt.plot([0.5, 0.5], [0, 1], color='lightgray', lw=5,
            transform=plt.gcf().transFigure, clip_on=False)
    plt.show()
    plt.close()
Пример #5
0
def plot_events(gen_sample, avg, preproc, datafile, setup, folder):
    # load in the data
    reader=Jets(datafile, 5000)
    events=reader.values()
    img_data=np.zeros((len(events), setup['npx'], setup['npx'], 1))
    li_gen=LundImage(npxlx = setup['npx'],
                     y_axis = setup['y_axis'] if 'y_axis' in setup else 'kt')
    for i, jet in enumerate(events):
        tree = JetTree(jet)
        img_data[i]=li_gen(tree).reshape(setup['npx'], setup['npx'], 1)

    # now reformat the training set as its average over n elements
    img_input = avg.transform(img_data)

    # set up the preprocessed input
    img_unmask = preproc.unmask(preproc.transform(img_input))
    # set up the generated images
    gen_unmask = preproc.unmask(gen_sample)
    gen_processed  = preproc.inverse(gen_sample)
    gen_final = avg.inverse(gen_processed)
    with PdfPages(f'{folder}/plot_events.pdf') as pdf:
        cbartics   = [-1.0, -0.5, 0.0, 0.5, 1.0]
        fig=plt.figure(figsize=(4.5,4))
        plt.title('Raw input')
        plt.imshow(img_data[0].reshape(setup['npx'],setup['npx']).transpose(),
                   vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower',
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics)
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig(bbox_inches='tight')
        plt.close()
        
        fig=plt.figure(figsize=(4.5,4))
        plt.title('Averaged input')
        plt.imshow(img_input[0].reshape(setup['npx'],setup['npx']).transpose(),
                   vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower',
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics)
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig(bbox_inches='tight')
        plt.close()
        
        fig=plt.figure(figsize=(4.5,4))
        plt.title('Preprocessed input')
        plt.imshow(img_unmask[0].reshape(setup['npx'],setup['npx']).transpose(),
                   vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower',
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics)
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig(bbox_inches='tight')
        plt.close()

        fig=plt.figure(figsize=(4.5,4))
        plt.title('Raw generated output')
        plt.imshow(gen_unmask[0].reshape(setup['npx'],setup['npx']).transpose(),
                   vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower',
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics)
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig(bbox_inches='tight')
        plt.close()

        fig=plt.figure(figsize=(4.5,4))
        plt.title('Processed generated sample')
        plt.imshow(gen_processed[0].reshape(setup['npx'],setup['npx']).transpose(),
                   vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower',
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics)
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig(bbox_inches='tight')
        plt.close()

        fig=plt.figure(figsize=(4.5,4))
        plt.title('Generated sample')
        plt.imshow(gen_final[0].reshape(setup['npx'],setup['npx']).transpose(),
                   vmin=-1.0, vmax=1.0, cmap=cm.seismic, origin='lower',
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics)
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig(bbox_inches='tight')
        plt.close()
Пример #6
0
def plot_events_debug(gen_sample, preproc, datafile, setup, folder):
    # load in the data
    if datafile=='mnist':
        # if mnist data, load the images from keras
        from keras.datasets import mnist
        (img_data, _), (_, _) = mnist.load_data()
        # Rescale -1 to 1
        img_data = (img_data.astype(np.float32) - 127.5) / 127.5
        img_data = np.expand_dims(img_data, axis=3)
    else:
        reader=Jets(datafile, 5000)
        events=reader.values()
        img_data=np.zeros((len(events), setup['npx'], setup['npx'], 1))
        li_gen=LundImage(npxlx = setup['npx'], 
                         y_axis = setup['y_axis'] if 'y_axis' in setup else 'kt')
        for i, jet in enumerate(events):
            tree = JetTree(jet)
            img_data[i]=li_gen(tree).reshape(setup['npx'], setup['npx'], 1)

    # now reformat the training set as its average over n elements
    batch_averaged_img = np.zeros((len(img_data), setup['npx'], setup['npx'], 1))
    for i in range(len(img_data)):
        batch_averaged_img[i] = \
            np.average(img_data[np.random.choice(img_data.shape[0], setup['navg'],
                                                 replace=False), :], axis=0)
    img_input = batch_averaged_img

    # set up the preprocessed input
    img_unmask = preproc.unmask(preproc.transform(img_input))
    # set up the generated images
    gen_unmask = preproc.unmask(gen_sample)
    gen_final  = preproc.inverse(gen_sample)
    fig, ax=plt.subplots(figsize=(15,6), nrows=5,ncols=12)
    i=0 
    j=0 
    for row in ax: 
        for col in row:
            col.axis('off')
            if i%12<3:
                if i%3==1 and j==0:
                    col.set_title('Input image')
                col.imshow(img_input[i%3 + 3*j].reshape(setup['npx'],setup['npx']).transpose(),
                           vmin=0.0, vmax=0.5, origin='lower')
            elif i%12<6:
                if i%3==1 and j==0:
                    col.set_title('Preprocessed input')
                col.imshow(img_unmask[i%3 + 3*j].reshape(setup['npx'],setup['npx']).transpose(),
                           vmin=-1.0, vmax=1, cmap=cm.seismic, origin='lower')
            elif i%12<9:
                if i%3==1 and j==0:
                    col.set_title('Raw generated sample')
                col.imshow(gen_unmask[i%3 + 3*j].reshape(setup['npx'],setup['npx']).transpose(),
                           vmin=-1.0, vmax=1, cmap=cm.seismic, origin='lower')
            else:
                if i%3==1 and j==0:
                    col.set_title('Final generated sample')
                col.imshow(gen_final[i%3 + 3*j].reshape(setup['npx'],setup['npx']).transpose(),
                           vmin=0.0, vmax=0.5, origin='lower')
            i+=1
        j+=1
    plt.savefig(f'{folder}/plot_debug.pdf')
Пример #7
0
def plot_lund_with_ref(filename, reference, figname, y_axis='kt'):
    """Plot a samples of lund images and the average density along with reference data."""
    r, c = 5, 5
    imgs = np.load(filename)
    sample = imgs[np.random.choice(imgs.shape[0], r*c, replace=False), :]

    if reference == 'mnist':
        # if mnist data, load the images from keras
        from keras.datasets import mnist
        (imgs_ref, _), (_, _) = mnist.load_data()
        # Rescale -1 to 1
        imgs_ref = (imgs_ref.astype(np.float32) - 127.5) / 127.5
    else:
        # now read in the pythia reference sample
        reader=Jets(reference, imgs.shape[0])
        events=reader.values()
        imgs_ref=np.zeros((len(events), imgs.shape[1], imgs.shape[2]))
        li_gen=LundImage(npxlx = imgs.shape[1], npxly = imgs.shape[2], 
                         y_axis = y_axis)
        for i, jet in enumerate(events):
            tree = JetTree(jet)
            imgs_ref[i]=li_gen(tree).reshape(imgs.shape[1], imgs.shape[2])
    sample_ref = imgs_ref[np.random.choice(imgs_ref.shape[0], r*c, replace=False), :]
    with PdfPages(figname) as pdf:
        fig, axs = plt.subplots(r, c)
        plt.suptitle('generated')
        cnt = 0
        for i in range(r):
            for j in range(c):
                axs[i,j].imshow(imgs[cnt, :,:].transpose(), origin='lower', cmap='gray',vmin=0.0,vmax=1.0)
                axs[i,j].axis('off')
                cnt += 1
        pdf.savefig()
        plt.close()
        fig, axs = plt.subplots(r, c)
        plt.suptitle('reference')
        cnt = 0
        for i in range(r):
            for j in range(c):
                axs[i,j].imshow(imgs_ref[cnt, :,:].transpose(), origin='lower', cmap='gray',vmin=0.0,vmax=1.0)
                axs[i,j].axis('off')
                cnt += 1
        pdf.savefig()
        plt.close()
        fig=plt.figure(figsize=(6,6))
        cbartics   = [0, 0.05, 0.1, 0.15, 0.2, 0.25]
        plt.title('generated')
        plt.imshow(np.average(imgs,axis=0).transpose(), origin='lower',vmin=0.0,vmax=0.2,
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics)
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig()
        plt.close()
        fig=plt.figure(figsize=(6,6))
        plt.title('reference')
        plt.imshow(np.average(imgs_ref,axis=0).transpose(), origin='lower',vmin=0.0, vmax=0.2,
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical', label=r'$\rho$', ticks=cbartics)
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig()
        plt.close()
        fig=plt.figure(figsize=(6,6))
        plt.title('generated/reference')
        plt.imshow(np.divide(np.average(imgs,axis=0).transpose(), np.average(imgs_ref,axis=0).transpose()),
                   origin='lower', vmin=0.5, vmax=1.5, cmap=cm.seismic,
                   aspect='auto', extent=[LundImage.xval[0], LundImage.xval[1],
                                          LundImage.yval[0], LundImage.yval[1]])
        plt.colorbar(orientation='vertical')
        plt.xlabel('$\ln(1 / \Delta_{ab})$')
        plt.ylabel('$\ln(k_{t} / \mathrm{GeV})$')
        pdf.savefig()
        plt.close()
        fig=plt.figure()
        bins = np.arange(0, 101, 1)
        gen_act=[]
        ref_act=[]
        for i in range(len(imgs)):
            gen_act.append(np.sum(imgs[i]))
        for i in range(len(imgs_ref)):
            ref_act.append(np.sum(imgs_ref[i]))
        plt.hist(gen_act, bins=bins, density=True, color='C0', alpha=0.3, label='generated')
        plt.hist(ref_act, bins=bins, density=True, color='C1', alpha=0.3, label='reference')
        plt.title('activated pixels')
        plt.xlim((0,50))
        plt.legend()
        plt.close()
        pdf.savefig(fig)
Пример #8
0
def build_and_train_model(setup):
    """Training model"""
    print('[+] Training model')
    K.clear_session()
    if setup['model'] not in ('gan', 'dcgan', 'wgan', 'wgangp', 'vae', 'aae',
                              'bgan', 'lsgan'):
        raise ValueError('Invalid input: choose one model at a time.')

    # read in the data set
    if setup['data'] == 'mnist':
        print('[+] Loading mnist data')
        from keras.datasets import mnist
        # for debugging purposes, we have the option of loading in the
        # mnist data and training the model on this.
        (img_data, _), (_, _) = mnist.load_data()
        # Rescale -1 to 1
        if setup['model'] is not 'vae':
            img_data = (img_data.astype(np.float32) - 127.5) / 127.5
        else:
            img_data = img_data.astype('float32') / 255
        img_data = np.expand_dims(img_data, axis=3)
    else:
        # load in the jets from file, and create an array of lund images
        print('[+] Reading jet data from file')
        reader = Jets(setup['data'], setup['nev'])
        events = reader.values()
        img_data = np.zeros((len(events), setup['npx'], setup['npx'], 1))
        li_gen = LundImage(
            npxlx=setup['npx'],
            y_axis=setup['y_axis'] if 'y_axis' in setup else 'kt')
        for i, jet in enumerate(events):
            tree = JetTree(jet)
            img_data[i] = li_gen(tree).reshape(setup['npx'], setup['npx'], 1)

    avg = Averager(setup['navg'])
    img_train = avg.transform(img_data)

    # set up a preprocessing pipeline
    preprocess = build_preprocessor(setup)

    # prepare the training data for the model training
    print('[+] Fitting the preprocessor')
    preprocess.fit(img_train)

    # NB: for ZCA, the zca factor is set in the process.transform call
    img_train = preprocess.transform(img_train)

    # now set up the model
    model = build_model(setup, length=(img_train.shape[1]))

    # train on the images
    print('[+] Training the generative model')
    model.train(img_train,
                epochs=setup['epochs'],
                batch_size=setup['batch_size'])

    # now generate a test sample and save it
    gen_sample = model.generate(setup['ngen'])

    # get the raw loss, evaluated on gan input and generated sample
    print('[+] Calculating loss on raw training data')
    loss_raw = loss_calc_raw(
        preprocess.unmask(gen_sample[:min(setup['ngen'], len(img_train))]),
        preprocess.unmask(img_train[:min(setup['ngen'], len(img_train))]))

    # retransform the generated sample to image space
    gen_sample = preprocess.inverse(gen_sample)
    gen_sample = avg.inverse(gen_sample)

    # get reference sample and generated sample for tests
    ref_sample = img_data.reshape(img_data.shape[0],setup['npx'],setup['npx'])\
        [np.random.choice(img_data.shape[0], len(gen_sample), replace=True), :]

    print('[+] Calculating final loss after inverting preprocessing')
    loss = (loss_calc(gen_sample, ref_sample), loss_raw)
    if setup['scan']:
        res = {
            'loss': loss[0] if setup['monitor_final_loss'] else loss[1],
            'status': STATUS_OK
        }
    else:
        res = model, gen_sample, loss, preprocess
    return res