def main():
    ###make sure to change these when running in a new enviorment!###
    #location of data directory
    filepath_cat1 = cu.get_output_path() + 'processed_data/yang_groupcat/'
    filepath_cat2 = cu.get_output_path() + 'processed_data/mpa_dr7/'
    #save data to directory...
    savepath1 = filepath_cat1+'mpa_dr7_match/'
    savepath2 = filepath_cat2+'yang_groupcat_match/'
    #################################################################

    catalogues_1=['sample1_L_petro','sample2_L_petro','sample3_L_petro',\
                  'sample1_M_petro','sample2_M_petro','sample3_M_petro',\
                  'sample1_L_model','sample2_L_model','sample3_L_model',\
                  'sample1_M_model','sample2_M_model','sample3_M_model']
    catalogues_2=['gal_info_gal_totspecsfr_dr7_v5_2']


    for catalogue in catalogues_1:
        catalogue1 = catalogue
        catalogue2 = catalogues_2[0]
        print catalogue1,'match into', catalogue2
    
        f1 =  h5py.File(filepath_cat1+catalogue1+'.hdf5', 'r')  #open catalogue file
        GC = f1.get(catalogue1)

        f2 =  h5py.File(filepath_cat2+catalogue2+'.hdf5', 'r')  #open catalogue file
        W = f2.get(catalogue2)

        da=2.0*1.0/3600.0 #matching length
        result_1 = np.array(cu.spherematch(GC['RAgal'], GC['DECgal'], W['RA'], W['DEC'], tol=da, nnearest=1))
        #result_2 = np.array(cu.spherematch(GC['RAgal'], GC['DECgal'], W['RA'], W['DEC'], tol=da, nnearest=2)) #not used
        repeats = [item for item, count in Counter(result_1[1]).iteritems() if count > 1] #double matched objects
        if len(repeats) > 0:
            remove=np.zeros((0,),dtype=np.int) #which entries to remove
            for repeat in repeats:
                result_a = np.where(result_1[1]==repeat)[0] #find indices of the double matched object into catalogue2
                result_b = np.where(result_1[2][result_a]>np.min(result_1[2][result_a]))[0] #find less good matches
                result_c = result_a[result_b] #indices of less good matches into catalogue2
                remove = np.hstack((remove,result_c)) #indices which should be removed
            keep = np.arange(0,len(result_1[0]),1).astype(int)
            keep = np.in1d(keep,remove)==False
            result = result_1[:,keep]
            #unique = np.setdiff1d(result_2[0], result_1[0]) #not used
        else: result = result

        filename1 = catalogue2+'_'+catalogue1+'_match'
        filename2 = catalogue1+'_'+catalogue2+'_match'

        np.save(savepath1+filename1, result[1].astype(int))
        np.save(savepath2+filename2, result[0].astype(int))
示例#2
0
def main():
    
    ###make sure to change these when running in a new enviorment!###
    #location of data directories
    filepath_1 = cu.get_output_path() + 'processed_data/mpa_dr7/'
    filepath_2 = cu.get_output_path() + 'processed_data/NYU_VAGC/'
    #################################################################

    catalogue_1 = 'mpa_dr7_unique'
    catalogue_2 = 'nyu_vagc_dr7'
  
    f_1 =  h5py.File(filepath_1+catalogue_1+'.hdf5', 'r')
    dset_1 = f_1.get(catalogue_1)
    dset_1 = np.array(dset_1)
    #print(dset_1.dtype.names)

    f_2 =  h5py.File(filepath_2+catalogue_2+'.hdf5', 'r')
    dset_2 = f_2.get(catalogue_2)
    dset_2 = np.array(dset_2)
    #print(dset_2.dtype.names)
    
    print("number of objects in MPA-JHU catalogue: {0}".format(len(dset_1)))
    print("number of objects in NYU catalogue: {0}".format(len(dset_2)))
    
    da = 1/3600.0 * 2.0 #match length is 2"
    ind1, ind2, ds = cu.spherematch(dset_1['RA'], dset_1['DEC'],\
                                    dset_2['RA'], dset_2['DEC'],\
                                    tol=da, nnearest=1)
    
    print("minimum angular seperation: {0}''".format(np.min(ds)*3600.0))
    print("maximum angular seperation: {0}''".format(np.max(ds)*3600.0))
    print("number of matchs: {0}".format(len(ind1)))
    
    print(dset_1[ind1]['Z'])
    print(dset_2[ind2]['Z'])
    
    #save matching file
    filename = 'matches_into_nyu_vagc'
    np.save(filepath_1+'matches/'+filename,ind1)
    filename = 'nyu_vagc_matched_to_mpa'
    np.save(filepath_1+'matches/'+filename,ind1)
示例#3
0
def main():

    ###make sure to change these when running in a new enviorment!###
    #location of data directories
    filepath_1 = cu.get_output_path() + 'processed_data/mpa_dr7/'
    filepath_2 = cu.get_output_path() + 'processed_data/NYU_VAGC/'
    #################################################################

    catalogue_1 = 'mpa_dr7_unique'
    catalogue_2 = 'nyu_vagc_dr7'

    f_1 = h5py.File(filepath_1 + catalogue_1 + '.hdf5', 'r')
    dset_1 = f_1.get(catalogue_1)
    dset_1 = np.array(dset_1)
    #print(dset_1.dtype.names)

    f_2 = h5py.File(filepath_2 + catalogue_2 + '.hdf5', 'r')
    dset_2 = f_2.get(catalogue_2)
    dset_2 = np.array(dset_2)
    #print(dset_2.dtype.names)

    print("number of objects in MPA-JHU catalogue: {0}".format(len(dset_1)))
    print("number of objects in NYU catalogue: {0}".format(len(dset_2)))

    da = 1 / 3600.0 * 2.0  #match length is 2"
    ind1, ind2, ds = cu.spherematch(dset_1['RA'], dset_1['DEC'],\
                                    dset_2['RA'], dset_2['DEC'],\
                                    tol=da, nnearest=1)

    print("minimum angular seperation: {0}''".format(np.min(ds) * 3600.0))
    print("maximum angular seperation: {0}''".format(np.max(ds) * 3600.0))
    print("number of matchs: {0}".format(len(ind1)))

    print(dset_1[ind1]['Z'])
    print(dset_2[ind2]['Z'])

    #save matching file
    filename = 'matches_into_nyu_vagc'
    np.save(filepath_1 + 'matches/' + filename, ind1)
    filename = 'nyu_vagc_matched_to_mpa'
    np.save(filepath_1 + 'matches/' + filename, ind1)
def main():
    ###make sure to change these when running in a new enviorment!###
    #location of data directory
    filepath_cat1 = cu.get_output_path() + 'processed_data/yang_groupcat/'
    filepath_cat2 = cu.get_output_path() + 'processed_data/NYU_VAGC/'
    #save data to directory...
    savepath1 = filepath_cat1+'nyu_vagc_match/'
    savepath2 = filepath_cat2+'yang_groupcat_match/'
    #################################################################

    catalogues_1 = ['sample1_L_petro','sample2_L_petro','sample3_L_petro',\
                    'sample1_M_petro','sample2_M_petro','sample3_M_petro',\
                    'sample1_L_model','sample2_L_model','sample3_L_model',\
                    'sample1_M_model','sample2_M_model','sample3_M_model']
    catalogues_2 = ['nyu_vagc_dr7']


    for catalogue in catalogues_1:
        catalogue1 = catalogue
        catalogue2 = catalogues_2[0]
        print catalogue1,'match into', catalogue2
    
        f1 =  h5py.File(filepath_cat1+catalogue1+'.hdf5', 'r')  #open catalogue file
        GC = f1.get(catalogue1)

        f2 =  h5py.File(filepath_cat2+catalogue2+'.hdf5', 'r')  #open catalogue file
        W = f2.get(catalogue2)

        da=2.0*1.0/3600.0 #matching length in degrees
        result_1 = np.array(cu.spherematch(GC['RAgal'], GC['DECgal'], W['RA'], W['DEC'], tol=da, nnearest=1))
        #result_2 = np.array(cu.spherematch(GC['RAgal'], GC['DECgal'], W['RA'], W['DEC'], tol=da, nnearest=2)) #not used
        repeats = [item for item, count in Counter(result_1[1]).iteritems() if count > 1] #double matched objects
        if len(repeats) > 0:
            remove=np.zeros((0,),dtype=np.int) #which entries to remove
            for repeat in repeats:
                result_a = np.where(result_1[1]==repeat)[0] #find indices of the double matched object into catalogue2
                result_b = np.where(result_1[2][result_a]>np.min(result_1[2][result_a]))[0] #find less good matches
                result_c = result_a[result_b] #indices of less good matches into catalogue2
                remove = np.hstack((remove,result_c)) #indices which should be removed
            keep = np.arange(0,len(result_1[0]),1).astype(int)
            keep = np.in1d(keep,remove)==False
            result = result_1[:,keep]
            #unique = np.setdiff1d(result_2[0], result_1[0]) #not used
        else: result = result_1
        
        '''
        x = cu.spheredist(GC['RAgal'],GC['DECgal'],W['RA'][nyu_id],W['DEC'][nyu_id])
        bins = np.arange(0,0.1,da)
        h, bins = np.histogram(x,bins=bins)
        bins = bins*3600.0
        print sum(h[1:])
        print len(GC)-len(result_1[0])
        plt.plot(bins[:-1],h[:])
        plt.yscale('log')
        plt.xlabel('da (deg)')
        plt.ylabel('N')
        plt.title(catalogue1)
        plt.show(block=False)
        '''

        filename1 = catalogue2+'_'+catalogue1+'_match'
        filename2 = catalogue1+'_'+catalogue2+'_match'

        np.save(savepath1+filename1, result[1].astype(int))
        np.save(savepath2+filename2, result[0].astype(int))
def main():
    ###make sure to change these when running in a new enviorment!###
    #location of data directory
    filepath_cat1 = cu.get_output_path() + 'processed_data/yang_groupcat/'
    filepath_cat2 = cu.get_output_path() + 'processed_data/mpa_dr7/'
    #save data to directory...
    savepath1 = filepath_cat1 + 'mpa_dr7_match/'
    savepath2 = filepath_cat2 + 'yang_groupcat_match/'
    #################################################################

    catalogues_1=['sample1_L_petro','sample2_L_petro','sample3_L_petro',\
                  'sample1_M_petro','sample2_M_petro','sample3_M_petro',\
                  'sample1_L_model','sample2_L_model','sample3_L_model',\
                  'sample1_M_model','sample2_M_model','sample3_M_model']
    catalogues_2 = ['gal_info_gal_totspecsfr_dr7_v5_2']

    for catalogue in catalogues_1:
        catalogue1 = catalogue
        catalogue2 = catalogues_2[0]
        print catalogue1, 'match into', catalogue2

        f1 = h5py.File(filepath_cat1 + catalogue1 + '.hdf5',
                       'r')  #open catalogue file
        GC = f1.get(catalogue1)

        f2 = h5py.File(filepath_cat2 + catalogue2 + '.hdf5',
                       'r')  #open catalogue file
        W = f2.get(catalogue2)

        da = 2.0 * 1.0 / 3600.0  #matching length
        result_1 = np.array(
            cu.spherematch(GC['RAgal'],
                           GC['DECgal'],
                           W['RA'],
                           W['DEC'],
                           tol=da,
                           nnearest=1))
        #result_2 = np.array(cu.spherematch(GC['RAgal'], GC['DECgal'], W['RA'], W['DEC'], tol=da, nnearest=2)) #not used
        repeats = [
            item for item, count in Counter(result_1[1]).iteritems()
            if count > 1
        ]  #double matched objects
        if len(repeats) > 0:
            remove = np.zeros((0, ), dtype=np.int)  #which entries to remove
            for repeat in repeats:
                result_a = np.where(
                    result_1[1] == repeat
                )[0]  #find indices of the double matched object into catalogue2
                result_b = np.where(
                    result_1[2][result_a] > np.min(result_1[2][result_a]))[
                        0]  #find less good matches
                result_c = result_a[
                    result_b]  #indices of less good matches into catalogue2
                remove = np.hstack(
                    (remove, result_c))  #indices which should be removed
            keep = np.arange(0, len(result_1[0]), 1).astype(int)
            keep = np.in1d(keep, remove) == False
            result = result_1[:, keep]
            #unique = np.setdiff1d(result_2[0], result_1[0]) #not used
        else:
            result = result

        filename1 = catalogue2 + '_' + catalogue1 + '_match'
        filename2 = catalogue1 + '_' + catalogue2 + '_match'

        np.save(savepath1 + filename1, result[1].astype(int))
        np.save(savepath2 + filename2, result[0].astype(int))
示例#6
0
def main():
    ###make sure to change these when running in a new enviorment!###
    #location of data directory
    filepath_cat1 = cu.get_output_path() + 'processed_data/yang_groupcat/'
    filepath_cat2 = cu.get_output_path() + 'processed_data/NYU_VAGC/'
    #save data to directory...
    savepath1 = filepath_cat1 + 'nyu_vagc_match/'
    savepath2 = filepath_cat2 + 'yang_groupcat_match/'
    #################################################################

    catalogues_1 = ['sample1_L_petro','sample2_L_petro','sample3_L_petro',\
                    'sample1_M_petro','sample2_M_petro','sample3_M_petro',\
                    'sample1_L_model','sample2_L_model','sample3_L_model',\
                    'sample1_M_model','sample2_M_model','sample3_M_model']
    catalogues_2 = ['nyu_vagc_dr7']

    for catalogue in catalogues_1:
        catalogue1 = catalogue
        catalogue2 = catalogues_2[0]
        print catalogue1, 'match into', catalogue2

        f1 = h5py.File(filepath_cat1 + catalogue1 + '.hdf5',
                       'r')  #open catalogue file
        GC = f1.get(catalogue1)

        f2 = h5py.File(filepath_cat2 + catalogue2 + '.hdf5',
                       'r')  #open catalogue file
        W = f2.get(catalogue2)

        da = 2.0 * 1.0 / 3600.0  #matching length in degrees
        result_1 = np.array(
            cu.spherematch(GC['RAgal'],
                           GC['DECgal'],
                           W['RA'],
                           W['DEC'],
                           tol=da,
                           nnearest=1))
        #result_2 = np.array(cu.spherematch(GC['RAgal'], GC['DECgal'], W['RA'], W['DEC'], tol=da, nnearest=2)) #not used
        repeats = [
            item for item, count in Counter(result_1[1]).iteritems()
            if count > 1
        ]  #double matched objects
        if len(repeats) > 0:
            remove = np.zeros((0, ), dtype=np.int)  #which entries to remove
            for repeat in repeats:
                result_a = np.where(
                    result_1[1] == repeat
                )[0]  #find indices of the double matched object into catalogue2
                result_b = np.where(
                    result_1[2][result_a] > np.min(result_1[2][result_a]))[
                        0]  #find less good matches
                result_c = result_a[
                    result_b]  #indices of less good matches into catalogue2
                remove = np.hstack(
                    (remove, result_c))  #indices which should be removed
            keep = np.arange(0, len(result_1[0]), 1).astype(int)
            keep = np.in1d(keep, remove) == False
            result = result_1[:, keep]
            #unique = np.setdiff1d(result_2[0], result_1[0]) #not used
        else:
            result = result_1
        '''
        x = cu.spheredist(GC['RAgal'],GC['DECgal'],W['RA'][nyu_id],W['DEC'][nyu_id])
        bins = np.arange(0,0.1,da)
        h, bins = np.histogram(x,bins=bins)
        bins = bins*3600.0
        print sum(h[1:])
        print len(GC)-len(result_1[0])
        plt.plot(bins[:-1],h[:])
        plt.yscale('log')
        plt.xlabel('da (deg)')
        plt.ylabel('N')
        plt.title(catalogue1)
        plt.show(block=False)
        '''

        filename1 = catalogue2 + '_' + catalogue1 + '_match'
        filename2 = catalogue1 + '_' + catalogue2 + '_match'

        np.save(savepath1 + filename1, result[1].astype(int))
        np.save(savepath2 + filename2, result[0].astype(int))
示例#7
0
def main():
    ###make sure to change these when running in a new enviorment!###
    #location of data directory
    filepath_cat1 = cu.get_output_path() + 'processed_data/Berland_groupcat/'
    filepath_cat2 = cu.get_output_path() + 'processed_data/NYU_VAGC/'
    #save data to directory...
    savepath1 = filepath_cat1 + 'nyu_vagc_match/'
    savepath2 = filepath_cat2 + 'berland_groupcat_match/'
    #################################################################

    catalogues_1 = ['mr19_groups', 'smthresh10.2.groups', 'smthresh9.8.groups']
    catalogues_2 = ['nyu_vagc_dr7']

    catalogue1 = catalogues_1[0]
    catalogue2 = catalogues_2[0]
    print catalogue1, 'match into', catalogue2

    f1 = h5py.File(filepath_cat1 + catalogue1 + '.hdf5',
                   'r')  #open catalogue file
    GC = f1.get(catalogue1)

    f2 = h5py.File(filepath_cat2 + catalogue2 + '.hdf5',
                   'r')  #open catalogue file
    W = f2.get(catalogue2)

    da = 2.0 * 1.0 / 3600.0  #matching length
    result = cu.spherematch(GC['RA'],
                            GC['DEC'],
                            W['RA'],
                            W['DEC'],
                            tol=da,
                            nnearest=1)

    #check to see if anything was matched to the same object
    repeats = [
        item for item, count in Counter(result[1]).iteritems() if count > 1
    ]
    if len(repeats) > 0:
        print 'number of double matched objects:', len(repeats)

    #check to see if every object has a match
    if len(result[0]) == len(GC):
        print 'a match was found for every object.'
    else:
        print 'some objects do not have matches.'

    filename1 = catalogue2 + '_' + catalogue1 + '_match'
    filename2 = catalogue1 + '_' + catalogue2 + '_match'

    np.save(savepath1 + filename1, result[1])
    np.save(savepath2 + filename2, result[0])

    catalogue1 = catalogues_1[1]
    catalogue2 = catalogues_2[0]
    print catalogue1, 'match into', catalogue2

    f1 = h5py.File(filepath_cat1 + catalogue1 + '.hdf5',
                   'r')  #open catalogue file
    GC = f1.get(catalogue1)

    f2 = h5py.File(filepath_cat2 + catalogue2 + '.hdf5',
                   'r')  #open catalogue file
    W = f2.get(catalogue2)

    da = 2.0 * 1.0 / 3600.0  #matching length
    result = cu.spherematch(GC['ra'],
                            GC['dec'],
                            W['RA'],
                            W['DEC'],
                            tol=da,
                            nnearest=1)

    #check to see if anything was matched to the same object
    repeats = [
        item for item, count in Counter(result[1]).iteritems() if count > 1
    ]
    if len(repeats) > 0:
        print 'number of double matched objects:', len(repeats)

    #check to see if every object has a match
    if len(result[0]) == len(GC):
        print 'a match was found for every object.'
    else:
        print 'some objects do not have matches.'

    filename1 = catalogue2 + '_' + catalogue1 + '_match'
    filename2 = catalogue1 + '_' + catalogue2 + '_match'

    np.save(savepath1 + filename1, result[1])
    np.save(savepath2 + filename2, result[0])

    catalogue1 = catalogues_1[2]
    catalogue2 = catalogues_2[0]
    print catalogue1, 'match into', catalogue2

    f1 = h5py.File(filepath_cat1 + catalogue1 + '.hdf5',
                   'r')  #open catalogue file
    GC = f1.get(catalogue1)

    f2 = h5py.File(filepath_cat2 + catalogue2 + '.hdf5',
                   'r')  #open catalogue file
    W = f2.get(catalogue2)

    da = 2.0 * 1.0 / 3600.0  #matching length
    result = cu.spherematch(GC['ra'],
                            GC['dec'],
                            W['RA'],
                            W['DEC'],
                            tol=da,
                            nnearest=1)

    #check to see if anything was matched to the same object
    repeats = [
        item for item, count in Counter(result[1]).iteritems() if count > 1
    ]
    if len(repeats) > 0:
        print 'number of double matched objects:', len(repeats)

    #check to see if every object has a match
    if len(result[0]) == len(GC):
        print 'a match was found for every object.'
    else:
        print 'some objects do not have matches.'

    filename1 = catalogue2 + '_' + catalogue1 + '_match'
    filename2 = catalogue1 + '_' + catalogue2 + '_match'

    np.save(savepath1 + filename1, result[1])
    np.save(savepath2 + filename2, result[0])
def main():
    ###make sure to change these when running in a new enviorment!###
    #location of data directory
    filepath_cat1 = cu.get_output_path() + 'processed_data/Berland_groupcat/'
    filepath_cat2 = cu.get_output_path() + 'processed_data/NYU_VAGC/'
    #save data to directory...
    savepath1 = filepath_cat1+'nyu_vagc_match/'
    savepath2 = filepath_cat2+'berland_groupcat_match/'
    #################################################################

    catalogues_1=['mr19_groups', 'smthresh10.2.groups', 'smthresh9.8.groups']
    catalogues_2=['nyu_vagc_dr7']

    catalogue1=catalogues_1[0]
    catalogue2=catalogues_2[0]
    print catalogue1,'match into', catalogue2
    
    f1 =  h5py.File(filepath_cat1+catalogue1+'.hdf5', 'r')  #open catalogue file
    GC = f1.get(catalogue1)

    f2 =  h5py.File(filepath_cat2+catalogue2+'.hdf5', 'r')  #open catalogue file
    W = f2.get(catalogue2)

    da=2.0*1.0/3600.0 #matching length
    result = cu.spherematch(GC['RA'], GC['DEC'], W['RA'], W['DEC'], tol=da, nnearest=1)
    
    #check to see if anything was matched to the same object
    repeats = [item for item, count in Counter(result[1]).iteritems() if count > 1]
    if len(repeats)>0:
        print 'number of double matched objects:', len(repeats)

    #check to see if every object has a match
    if len(result[0])==len(GC):
        print 'a match was found for every object.'
    else: print 'some objects do not have matches.'

    filename1 = catalogue2+'_'+catalogue1+'_match'
    filename2 = catalogue1+'_'+catalogue2+'_match'

    np.save(savepath1+filename1, result[1])
    np.save(savepath2+filename2, result[0])

    catalogue1=catalogues_1[1]
    catalogue2=catalogues_2[0]
    print catalogue1,'match into', catalogue2
    
    f1 =  h5py.File(filepath_cat1+catalogue1+'.hdf5', 'r')  #open catalogue file
    GC = f1.get(catalogue1)

    f2 =  h5py.File(filepath_cat2+catalogue2+'.hdf5', 'r')  #open catalogue file
    W = f2.get(catalogue2)

    da=2.0*1.0/3600.0 #matching length
    result = cu.spherematch(GC['ra'], GC['dec'], W['RA'], W['DEC'], tol=da, nnearest=1)

    #check to see if anything was matched to the same object
    repeats = [item for item, count in Counter(result[1]).iteritems() if count > 1]
    if len(repeats)>0:
        print 'number of double matched objects:', len(repeats)

    #check to see if every object has a match
    if len(result[0])==len(GC):
        print 'a match was found for every object.'
    else: print 'some objects do not have matches.'

    filename1 = catalogue2+'_'+catalogue1+'_match'
    filename2 = catalogue1+'_'+catalogue2+'_match'

    np.save(savepath1+filename1, result[1])
    np.save(savepath2+filename2, result[0])

    catalogue1=catalogues_1[2]
    catalogue2=catalogues_2[0]
    print catalogue1,'match into', catalogue2
    
    f1 =  h5py.File(filepath_cat1+catalogue1+'.hdf5', 'r')  #open catalogue file
    GC = f1.get(catalogue1)

    f2 =  h5py.File(filepath_cat2+catalogue2+'.hdf5', 'r')  #open catalogue file
    W = f2.get(catalogue2)

    da=2.0*1.0/3600.0 #matching length
    result = cu.spherematch(GC['ra'], GC['dec'], W['RA'], W['DEC'], tol=da, nnearest=1)

    #check to see if anything was matched to the same object
    repeats = [item for item, count in Counter(result[1]).iteritems() if count > 1]
    if len(repeats)>0:
        print 'number of double matched objects:', len(repeats)

    #check to see if every object has a match
    if len(result[0])==len(GC):
        print 'a match was found for every object.'
    else: print 'some objects do not have matches.'

    filename1 = catalogue2+'_'+catalogue1+'_match'
    filename2 = catalogue1+'_'+catalogue2+'_match'

    np.save(savepath1+filename1, result[1])
    np.save(savepath2+filename2, result[0])