Exemplo n.º 1
0
def get_elbow(components,partial_mask):
    masked_eigenimgs = reshape_unmasked_values_to_shots( components.astype(np.float64),
                                                    partial_mask)
    qs = np.linspace(0,1,1)
    dc = DiffCorr(masked_eigenimgs,q_values=qs,k=0)
    eigenimg_ac = dc.autocorr()[:,:,1]

    if np.abs(eigenimg_ac[0])<0.5:
        cutoff=1
    else:
        for ii,aa in enumerate(eigenimg_ac[:,0]):
            if np.abs(aa)>=0.5:
                continue
            else:
                cutoff=ii
                break
    return cutoff
Exemplo n.º 2
0
def get_elbow(components, partial_mask):
    masked_eigenimgs = reshape_unmasked_values_to_shots(
        components.astype(np.float64), partial_mask)
    qs = np.linspace(0, 1, 1)
    dc = DiffCorr(masked_eigenimgs, q_values=qs, k=0)
    eigenimg_ac = dc.autocorr()[:, :, 1]

    if np.abs(eigenimg_ac[0]) < 0.5:
        cutoff = 1
    else:
        for ii, aa in enumerate(eigenimg_ac[:, 0]):
            if np.abs(aa) >= 0.5:
                continue
            else:
                cutoff = ii
                break
    return cutoff
Exemplo n.º 3
0
def pair_diff_PI(norm_shots, mask_corr, phi_offset=0, pair_method='int'):

    if pair_method == 'corr':
        print("doing corr pairing...")
        #dummy qs
        num_phi = norm_shots.shape[-1]
        qs = np.array([1.0])
        dc = DiffCorr(norm_shots, qs, 0, pre_dif=True)
        corr = dc.autocorr()

        corr /= mask_corr
        corr = corr[:, :, phi_offset:num_phi / 2 - phi_offset]

        eps = distance.cdist(corr[:, 0], corr[:, 0], metric='euclidean')

    if pair_method == 'int':
        print "doing intensity pair..."
        eps = distance.cdist(norm_shots[:, 0],
                             norm_shots[:, 0],
                             metric='euclidean')
    # do this so the diagonals are not the minimum, i.e. don't pair shot with itself
    epsI = 1.1 * eps.max(1) * np.identity(eps.shape[0])
    eps += epsI

    shot_preference = np.roll(eps.argsort(1), 1, axis=1)
    pref_dict = {str(E[0]): list(E[1:]) for E in shot_preference.astype(str)}

    print("stable roommate pair....")
    pairs_dict = stable.stableroomate(prefs=pref_dict)

    pairing = np.array(MakeTagPairs._remove_duplicate_pairs(pairs_dict))

    print("computing difference intensities...")
    diff_norm = np.zeros(
        (norm_shots.shape[0] / 2, norm_shots.shape[1], norm_shots.shape[-1]),
        dtype=np.float64)

    for index, pp in enumerate(pairing):
        diff_norm[index] = norm_shots[pp[0]] - norm_shots[pp[1]]

    return diff_norm, pairing
Exemplo n.º 4
0
    
        # get back the masked images and components
 
        masked_mean_train =reshape_unmasked_values_to_shots(Train,partial_mask).mean(0)

        # denoise
  
        Train_noise = new_Train[:,:num_pca].dot(components[:num_pca])
        denoise_Train= reshape_unmasked_values_to_shots(Train-Train_noise-Train.mean(0)[None,:]
                                                    , partial_mask)
        
        if denoise_Train.shape[0]%2>0:
            denoise_Train=denoise_Train[:-1]
        denoise_Train=denoise_Train[:-1][::2]-denoise_Train[1:][::2]
        dc=DiffCorr(denoise_Train,qvalues,0,pre_dif=True)
        Train_difcor= dc.autocorr().mean(0)[0]
        
        all_corrs.append(Train_difcor)
        all_nums.append(Train.shape[0])

        f_out.create_dataset('q%d/dif_cor%d'%(qidx, n_chunk)
            ,data=Train_difcor)


    all_corrs = np.array(all_corrs)
    all_nums = np.array(all_nums)
    # print all_corrs.shape
    # print all_nums

    ave_corr = np.sum(all_corrs*(all_nums/float(all_nums.sum()))[:,None],axis=0 )
    # print ave_corr.shape
    for idx,ss in enumerate(shots):
        norm_shots[idx]=normalize_shot(ss,this_mask)
    # do we want to normalize by the entire range of intensity?
    # divide into Train and test
    num_shots = norm_shots.shape[0]
    cutoff = int(num_pro_shots*0.1) # use 10% of the protein shots as testing set
    partial_mask = this_mask.copy()
    Train = norm_shots[cutoff:, partial_mask==1]
    Test = norm_shots[:cutoff, partial_mask==1]

    print ("%d test shots"%(Test.shape[0]))
    print ("%d train shots"%(Train.shape[0]))

    qvalues = np.linspace(0,1,partial_mask.shape[0])
    mask_dc = DiffCorr(partial_mask,qvalues,0, pre_dif=True)
    mask_cor = mask_dc.autocorr()
    if args.num_pca is None:
        num_pca = int(num_pca_components[qidx])
        max_pca = num_pca+5
    else:
        num_pca = args.num_pca+1
        max_pca = args.num_pca+1

    print('denoisng with PCA critical num_pca_components = %d...'%num_pca)
    if 'pca_components' not in f_out[q_group].keys():
        # if there is no pca component saved, then run it and save the components
        pca=PCA(n_components=50, whiten = False)

        new_Train=pca.fit_transform(Train)
        new_Test = pca.transform(Test)
        if 'explained_variance_ratio' not in f_out[q_group].keys():
Exemplo n.º 6
0
            continue

        if 'run%d' % run in f_out.keys():
            print("already seen this run, skip!")
            continue

        ##### load the mask used for this run
        f_mask = h5py.File(os.path.join(args.mask_dir, 'run%d.tbl' % run), 'r')

        mask = f_mask['polar_mask_binned'].value
        mask = (mask == mask.max())
        mask.shape
        # do the mask cor
        qs = np.linspace(0, 1, mask.shape[0])
        dc = DiffCorr(mask[None, :, :], qs, 0, pre_dif=True)
        mask_cor = dc.autocorr().mean(0)

        f_mask.close()

        f_out.create_group('run%d' % run)

        all_ave_cors = []

        all_nums = []

        for qidx in range(35):
            print('run%d q%d' % (run, qidx))
            if 'num_pca_cutoff2' in f['q%d' % qidx].keys():
                pca_num = f['q%d' % qidx]['num_pca_cutoff2'].value
            else:
                pca_num = f['q%d' % qidx]['num_pca_cutoff'].value
Exemplo n.º 7
0
    for idx, ss in enumerate(shots):
        norm_shots[idx] = normalize_shot(ss, this_mask)
    # do we want to normalize by the entire range of intensity?
    # divide into Train and test
    num_shots = norm_shots.shape[0]
    cutoff = int(num_shots * 0.1)  # use 10% of the shots as testing set
    partial_mask = this_mask.copy()
    Train = norm_shots[cutoff:, partial_mask == 1]
    Test = norm_shots[:cutoff, partial_mask == 1]

    print("%d test shots" % (Test.shape[0]))
    print("%d train shots" % (Train.shape[0]))

    qvalues = np.linspace(0, 1, partial_mask.shape[0])
    mask_dc = DiffCorr(partial_mask, qvalues, 0, pre_dif=True)
    mask_cor = mask_dc.autocorr()

    max_pca_components = []

    max_pca_components.append(int(num_pca_components[qidx]))
    if 'num_pca_cutoff' in f_out[q_group].keys():
        max_pca_components.append(f_out[q_group]['num_pca_cutoff'].value)
    max_pca_components = list(set(max_pca_components))

    if 'pca_components' not in f_out[q_group].keys():
        # if there is no pca component saved, then run it and save the components
        pca = PCA(n_components=50, whiten=False)

        new_Train = pca.fit_transform(Train)
        new_Test = pca.transform(Test)
        if 'explained_variance_ratio' not in f_out[q_group].keys():
Exemplo n.º 8
0
    f_out.create_group('q%d' % qidx)
    shots = PI[:, qidx, :][:, None, :]
    this_mask = mask[qidx][None, :]

    norm_shots = np.zeros_like(shots)

    for idx, ss in enumerate(shots):
        norm_shots[idx] = normalize_shot(ss, this_mask)

    print("computing single shot correlations")
    phi_offset = 10
    num_phi = norm_shots.shape[-1]
    qs = np.array([1.0])

    dc = DiffCorr(this_mask[None, :, :], qs, 0, pre_dif=True)
    mask_corr = dc.autocorr()

    dc = DiffCorr(norm_shots, qs, 0, pre_dif=True)
    corr = dc.autocorr()

    corr /= mask_corr
    corr = corr[:, :, phi_offset:num_phi / 2 - phi_offset]

    pca = PCA(n_components=args.num_pca)
    new_corr = pca.fit_transform(corr[:, 0, :])

    kmeans = KMeans(n_clusters=args.num_clusters)
    kmeans.fit(new_corr)

    f_out.create_dataset('q%d/cluster_labels' % qidx, data=kmeans.labels_)
    f_out.create_dataset('q%d/explained_variance_ratio' % qidx,
Exemplo n.º 9
0
cluster_set_keys = f_cluster.keys()

out_file = run_file.replace('.tbl', '_cor.h5')
f_out = h5py.File(os.path.join(save_dir, out_file), 'w')

if 'polar_mask_binned' in f.keys():
    mask = np.array(
        f['polar_mask_binned'].value == f['polar_mask_binned'].value.max(),
        dtype=int)
else:
    mask = np.load(
        '/reg/d/psdm/cxi/cxilp6715/scratch/water_data/binned_pmask_basic.npy')

qs = np.linspace(0.2, 0.88, mask.shape[0])
dc = DiffCorr(mask[None, :, :], qs, 0, pre_dif=True)
mask_ac = dc.autocorr()

PI = f['polar_imgs']
shot_tags = np.arange(0, PI.shape[0])

for set_key in cluster_set_keys:
    print("computing diff cor for %s..." % set_key)
    qidx = int(set_key.split('q')[1])
    labels = f_cluster[set_key]['cluster_labels'].value.astype(int)

    f_out.create_group(set_key)

    unique_labels = np.unique(labels)
    cluster_corrs = []
    cluster_num_shots = []
    for ll in unique_labels:
    sys.exit()
out_file2 = run_file.replace('.tbl', '_chunks_intershot_uncertainty.h5')
f_out2 = h5py.File(os.path.join(save_dir, out_file2), 'w')

if 'polar_mask_binned' in f.keys():
    mask = np.array(
        f['polar_mask_binned'].value == f['polar_mask_binned'].value.max(),
        dtype=int)
else:
    print("there is no mask stored with the shots")
    sys.exit()
    # mask = np.load('/reg/d/psdm/cxi/cxilp6715/results/shared_files/binned_pmask_basic.npy')
# do the mask cor
qs = np.linspace(0, 1, mask.shape[0])
dc = DiffCorr(mask[None, :, :], qs, 0, pre_dif=True)
mask_cor = dc.autocorr().mean(0)

PI = f['polar_imgs']
# filter by photon energy. If the photon energy of the shot if not within 100 EV of the average, do not use
photon_energy = np.nan_to_num(f['ebeam']['photon_energy'].value)
mean_E = photon_energy.mean()
E_sigma = 100.
shot_tage_to_keep=np.where( (photon_energy> (mean_E-E_sigma))\
    +(photon_energy< (mean_E-E_sigma)) )[0]
print('Num of shots to be used: %d' % (shot_tage_to_keep.size))

# figure which qs are used for pairing
qmin = args.qmin
qmax = args.qmax

if qmax is None:
Exemplo n.º 11
0
        masked_mean_train =reshape_unmasked_values_to_shots(Train,partial_mask).mean(0)
        
        #### this is just for saving to get error bars
        if save_cors:
            grp=f_out['q%d'%qidx]
            nn=grp['num_pca_cutoff'].value
            if 'all_difcors' in grp['pca%d'%nn].keys():
                print('already save dif cors for this cutoff (%d) at q%d'%(nn,qidx))
            else:

                Train_noise = new_Train[:,:nn].dot(components[:nn])
                denoise_Train= reshape_unmasked_values_to_shots(norm_shots-Train_noise-Train.mean(0)[None,:]
                                                            , partial_mask)

                dc=DiffCorr(denoise_Train,qvalues,0,pre_dif=False)
                Train_difcor= dc.autocorr()

                f_out.create_dataset('%s/pca%d/all_train_difcors'%(cluster_group,nn)
                    ,data=Train_difcor)
            del norm_shots
            continue
        

        # denoise
        for nn in range(1, max_pca):
            pca_group = '%s/pca%d'%(cluster_group,nn)
            if 'pca%d'%nn in f_out[cluster_group].keys():
                print("pca denoise at pca n_components = %d is already done. Skip!"%nn)
                continue

            if nn>0:
Exemplo n.º 12
0
    for idx,ss in enumerate(shots):
        norm_shots[idx]=normalize_shot(ss,this_mask)
    # do we want to normalize by the entire range of intensity?
    # divide into Train and test
    num_shots = norm_shots.shape[0]
    cutoff = int(num_shots*0.1) # use 10% of the shots as testing set
    partial_mask = this_mask.copy()
    Train = norm_shots[cutoff:, partial_mask==1]
    Test = norm_shots[:cutoff, partial_mask==1]

    print ("%d test shots"%(Test.shape[0]))
    print ("%d train shots"%(Train.shape[0]))

    qvalues = np.linspace(0,1,partial_mask.shape[0])
    mask_dc = DiffCorr(partial_mask,qvalues,0, pre_dif=True)
    mask_cor = mask_dc.autocorr()
    num_pca = int(num_pca_components[qidx])

    if num_pca >0:
        # do PCA stuff
        print('denoisng with PCA num_pca_components = %d...'%num_pca)
        pca=PCA(n_components=num_pca, whiten = False)
        new_Train=pca.fit_transform(Train)
        new_Test = pca.transform(Test)
        
        # get back the masked images and components
        components=pca.components_
        masked_mean_train =reshape_unmasked_values_to_shots(Train,partial_mask).mean(0)
        masked_mean_test =reshape_unmasked_values_to_shots(Test,partial_mask).mean(0)

        # denoise
Exemplo n.º 13
0
        grp = f_out['q%d' % qidx]
        nn = grp['num_pca_cutoff'].value
        if 'all_test_difcors' in grp['pca%d' % nn].keys():
            print('already save dif cors for this cutoff (%d) at q%d' %
                  (nn, qidx))
        else:

            Test_noise = new_Test[:, :nn].dot(components[:nn])
            denoise_Test = reshape_unmasked_values_to_shots(
                Test - Test_noise - Test.mean(0)[None, :], partial_mask)
            Train_noise = new_Train[:, :nn].dot(components[:nn])
            denoise_Train = reshape_unmasked_values_to_shots(
                Train - Train_noise - Train.mean(0)[None, :], partial_mask)

            dc = DiffCorr(denoise_Train, qvalues, 0, pre_dif=False)
            Train_difcor = dc.autocorr()

            dc = DiffCorr(denoise_Test, qvalues, 0, pre_dif=False)
            Test_difcor = dc.autocorr()

            f_out.create_dataset('q%d/pca%d/all_test_difcors' % (qidx, nn),
                                 data=Test_difcor)
            f_out.create_dataset('q%d/pca%d/all_train_difcors' % (qidx, nn),
                                 data=Train_difcor)

        del shots
        del norm_shots

        continue

    # denoise
Exemplo n.º 14
0
        masked_mean_train = reshape_unmasked_values_to_shots(
            Train, partial_mask).mean(0)

        # denoise

        Train_noise = new_Train[:, :num_pca].dot(components[:num_pca])
        denoise_Train = reshape_unmasked_values_to_shots(
            Train - Train_noise - Train.mean(0)[None, :], partial_mask)

        if denoise_Train.shape[0] % 2 > 0:
            denoise_Train = denoise_Train[:-1]

        denoise_Train_diff = denoise_Train[:-1][::2] - denoise_Train[1:][::2]
        dc = DiffCorr(denoise_Train_diff, qvalues, 0, pre_dif=True)
        Train_difcor = dc.autocorr().mean(0)[0]

        all_corrs.append(Train_difcor)
        all_nums.append(Train.shape[0])

        f_out.create_dataset('q%d/dif_cor%d' % (qidx, n_chunk),
                             data=Train_difcor)

        #########do clustering with corr PCA clustering#########
        print("computing single shot correlations")
        phi_offset = 10
        num_phi = denoise_Train.shape[-1]

        mask_dc = DiffCorr(partial_mask, qvalues, 0, pre_dif=True)
        mask_cor = mask_dc.autocorr()
Exemplo n.º 15
0
        # get back the masked images and components

        masked_mean_train = reshape_unmasked_values_to_shots(
            Train, partial_mask).mean(0)

        # denoise

        Train_noise = new_Train[:, :num_pca].dot(components[:num_pca])
        denoise_Train = reshape_unmasked_values_to_shots(
            Train - Train_noise - Train.mean(0)[None, :], partial_mask)

        if denoise_Train.shape[0] % 2 > 0:
            denoise_Train = denoise_Train[:-1]
        denoise_Train = denoise_Train[:-1][::2] - denoise_Train[1:][::2]
        dc = DiffCorr(denoise_Train, qvalues, 0, pre_dif=True)
        Train_difcor = dc.autocorr().mean(0)[0]

        all_corrs.append(Train_difcor)
        all_nums.append(Train.shape[0])

        f_out.create_dataset('q%d/dif_cor%d' % (qidx, n_chunk),
                             data=Train_difcor)

    all_corrs = np.array(all_corrs)
    all_nums = np.array(all_nums)
    # print all_corrs.shape
    # print all_nums

    ave_corr = np.sum(all_corrs * (all_nums / float(all_nums.sum()))[:, None],
                      axis=0)
    # print ave_corr.shape
Exemplo n.º 16
0
    for idx, ss in enumerate(shots):
        norm_shots[idx] = normalize_shot(ss, this_mask)
    # do we want to normalize by the entire range of intensity?
    # divide into Train and test
    num_shots = norm_shots.shape[0]
    cutoff = int(num_shots * 0.1)  # use 10% of the shots as testing set
    partial_mask = this_mask.copy()
    Train = norm_shots[cutoff:, partial_mask == 1]
    Test = norm_shots[:cutoff, partial_mask == 1]

    print("%d test shots" % (Test.shape[0]))
    print("%d train shots" % (Train.shape[0]))

    qvalues = np.linspace(0, 1, partial_mask.shape[0])
    mask_dc = DiffCorr(partial_mask, qvalues, 0, pre_dif=True)
    mask_cor = mask_dc.autocorr()
    if args.num_pca > 0:
        # do PCA stuff
        print('denoisng with PCA...')
        pca = PCA(n_components=args.num_pca, whiten=False)
        new_Train = pca.fit_transform(Train)
        new_Test = pca.transform(Test)

        # get back the masked images and components
        components = pca.components_
        masked_mean_train = reshape_unmasked_values_to_shots(
            Train, partial_mask).mean(0)
        masked_mean_test = reshape_unmasked_values_to_shots(
            Test, partial_mask).mean(0)

        # denoise
Exemplo n.º 17
0
cluster_file = run_file.replace('.tbl','_PCA-cluster.h5')
f_cluster = h5py.File(os.path.join(cluster_dir, cluster_file),'r')
cluster_set_keys = f_cluster.keys()

out_file = run_file.replace('.tbl','_cor.h5')
f_out = h5py.File(os.path.join(save_dir, out_file),'w')

if 'polar_mask_binned' in f.keys():
    mask = np.array(f['polar_mask_binned'].value==f['polar_mask_binned'].value.max(), dtype = int)
else:
    mask = np.load('/reg/d/psdm/cxi/cxilp6715/scratch/water_data/binned_pmask_basic.npy')


qs=np.linspace(0.2,0.88,mask.shape[0])
dc=DiffCorr(mask[None,:,:],qs,0,pre_dif=True)
mask_ac=dc.autocorr()


PI = f['polar_imgs']
shot_tags = np.arange(0,PI.shape[0])

for set_key in cluster_set_keys:
    print("computing diff cor for %s..."%set_key)
    qidx = int( set_key.split('q')[1] )
    labels = f_cluster[set_key]['cluster_labels'].value.astype(int)

    f_out.create_group(set_key)

    unique_labels=np.unique(labels)
    cluster_corrs=[]
    cluster_num_shots=[]
Exemplo n.º 18
0
    if args.save:
        grp=f_out['q%d'%qidx]
        nn=grp['num_pca_cutoff'].value
        if 'all_test_difcors' in grp['pca%d'%nn].keys():
            print('already save dif cors for this cutoff (%d) at q%d'%(nn,qidx))
        else:

            Test_noise = new_Test[:,:nn].dot(components[:nn])
            denoise_Test = reshape_unmasked_values_to_shots(Test-Test_noise-Test.mean(0)[None,:],
            partial_mask)
            Train_noise = new_Train[:,:nn].dot(components[:nn])
            denoise_Train= reshape_unmasked_values_to_shots(Train-Train_noise-Train.mean(0)[None,:]
                                                        , partial_mask)

            dc=DiffCorr(denoise_Train,qvalues,0,pre_dif=False)
            Train_difcor= dc.autocorr()

            dc=DiffCorr(denoise_Test,qvalues,0,pre_dif=False)
            Test_difcor= dc.autocorr()


            f_out.create_dataset('q%d/pca%d/all_test_difcors'%(qidx,nn)
                ,data=Test_difcor)
            f_out.create_dataset('q%d/pca%d/all_train_difcors'%(qidx,nn)
                ,data=Train_difcor)

        del shots
        del norm_shots

        continue
    
                nn=grp['num_pca_cutoff2'].value
            else:
                nn=grp['num_pca_cutoff'].value
            if 'all_test_difcors' in grp['pca%d'%nn].keys():
                print('already save dif cors for this cutoff (%d) at q%d'%(nn,qidx))
            else:

                Test_noise = new_Test[:,:nn].dot(components[:nn])
                denoise_Test = reshape_unmasked_values_to_shots(Test-Test_noise-Test.mean(0)[None,:],
                partial_mask)
                Train_noise = new_Train[:,:nn].dot(components[:nn])
                denoise_Train= reshape_unmasked_values_to_shots(Train-Train_noise-Train.mean(0)[None,:]
                                                            , partial_mask)

                dc=DiffCorr(denoise_Train,qvalues,0,pre_dif=False)
                Train_difcor= dc.autocorr()

                dc=DiffCorr(denoise_Test,qvalues,0,pre_dif=False)
                Test_difcor= dc.autocorr()


                f_out.create_dataset('q%d/pca%d/all_test_difcors'%(qidx,nn)
                    ,data=Test_difcor)
                f_out.create_dataset('q%d/pca%d/all_train_difcors'%(qidx,nn)
                    ,data=Train_difcor)

            
            del norm_shots
        else:

            # denoise
Exemplo n.º 20
0
# output file to save data
out_file = run_file.replace('.tbl', '_pca0.h5')
f_out = h5py.File(os.path.join(save_dir, out_file), 'w')

if 'polar_mask_binned' in f.keys():
    mask = np.array(
        f['polar_mask_binned'].value == f['polar_mask_binned'].value.max(),
        dtype=int)
else:
    print("there is no mask stored with the shots")
    sys.exit()
    # mask = np.load('/reg/d/psdm/cxi/cxilp6715/results/shared_files/binned_pmask_basic.npy')
qs = np.linspace(0, 1, mask.shape[0])
dc = DiffCorr(mask[None, :, :], qs, 0, pre_dif=True)
mask_cor = dc.autocorr().mean(0)

PI = f['polar_imgs']
# filter by photon energy. If the photon energy of the shot if not within 100 EV of the average, do not use
photon_energy = np.nan_to_num(f['ebeam']['photon_energy'].value)
mean_E = photon_energy.mean()
E_sigma = 100.
shot_tage_to_keep=np.where( (photon_energy> (mean_E-E_sigma))\
    +(photon_energy< (mean_E-E_sigma)) )[0]
print('Num of shots to be used: %d' % (shot_tage_to_keep.size))

# figure which qs are used for pairing
qmin = args.qmin
qmax = args.qmax

if qmax is None:
Exemplo n.º 21
0
    f_out.create_group('q%d'%qidx)
    shots=PI[:,qidx,:][:,None,:]
    this_mask = mask[qidx][None,:]

    norm_shots = np.zeros_like(shots)

    for idx,ss in enumerate(shots):
        norm_shots[idx]=normalize_shot(ss,this_mask)
    
    print("computing single shot correlations")
    phi_offset=10
    num_phi=norm_shots.shape[-1]
    qs = np.array([1.0])

    dc=DiffCorr(this_mask[None,:,:],qs,0,pre_dif=True)
    mask_corr=dc.autocorr()

    dc = DiffCorr(norm_shots,
      qs,0,pre_dif=True)
    corr = dc.autocorr()

    corr/=mask_corr
    corr=corr[:,:,phi_offset:num_phi/2-phi_offset]

    pca=PCA(n_components=args.num_pca)
    new_corr=pca.fit_transform(corr[:,0,:])

    kmeans=KMeans(n_clusters=args.num_clusters)
    kmeans.fit(new_corr)

    f_out.create_dataset('q%d/cluster_labels'%qidx,data=kmeans.labels_)
    for idx,ss in enumerate(shots):
        norm_shots[idx]=normalize_shot(ss,this_mask)
    # do we want to normalize by the entire range of intensity?
    # divide into Train and test
    num_shots = norm_shots.shape[0]
    cutoff = int(num_shots*0.1) # use 10% of the shots as testing set
    partial_mask = this_mask.copy()
    Train = norm_shots[cutoff:, partial_mask==1]
    Test = norm_shots[:cutoff, partial_mask==1]

    print ("%d test shots"%(Test.shape[0]))
    print ("%d train shots"%(Train.shape[0]))

    qvalues = np.linspace(0,1,partial_mask.shape[0])
    mask_dc = DiffCorr(partial_mask,qvalues,0, pre_dif=True)
    mask_cor = mask_dc.autocorr()
    if args.num_pca is None:
        num_pca = int(num_pca_components[qidx])
        max_pca = num_pca+5
    else:
        num_pca = args.num_pca+1
        max_pca = args.num_pca+1

    print('denoisng with PCA critical num_pca_components = %d...'%num_pca)
    if 'pca_components' not in f_out[q_group].keys():
        # if there is no pca component saved, then run it and save the components
        pca=PCA(n_components=50, whiten = False)

        new_Train=pca.fit_transform(Train)
        new_Test = pca.transform(Test)
        if 'explained_variance_ratio' not in f_out[q_group].keys():
Exemplo n.º 23
0
    for idx, ss in enumerate(shots):
        norm_shots[idx] = normalize_shot(ss, this_mask)
    # do we want to normalize by the entire range of intensity?
    # divide into Train and test
    num_shots = norm_shots.shape[0]
    cutoff = int(num_shots * 0.1)  # use 10% of the shots as testing set
    partial_mask = this_mask.copy()
    Train = norm_shots[cutoff:, partial_mask == 1]
    Test = norm_shots[:cutoff, partial_mask == 1]

    print("%d test shots" % (Test.shape[0]))
    print("%d train shots" % (Train.shape[0]))

    qvalues = np.linspace(0, 1, partial_mask.shape[0])
    mask_dc = DiffCorr(partial_mask, qvalues, 0, pre_dif=True)
    mask_cor = mask_dc.autocorr()
    if args.num_pca is None:
        num_pca = int(num_pca_components[qidx])
        max_pca = num_pca + 5
    else:
        num_pca = args.num_pca + 1
        max_pca = args.num_pca + 1

    print('denoisng with PCA critical num_pca_components = %d...' % num_pca)
    if 'pca_components' not in f_out[q_group].keys():
        # if there is no pca component saved, then run it and save the components
        pca = PCA(n_components=50, whiten=False)

        new_Train = pca.fit_transform(Train)
        new_Test = pca.transform(Test)
        if 'explained_variance_ratio' not in f_out[q_group].keys():