示例#1
0
def map_img_chars(b, map_array):
    """ Map values of array to corresponding chars, based on map_array.
    Returns a string array. """
    
    r, c = b.shape[:2]
    
    bins = linspace(b.min(), b.max(), len(map_array), endpoint=False)
    b = digitize(b.flatten(), bins)-1
    b.shape = r, c
    
    b = array(map_array)[b]
    return b
示例#2
0
    def get_binned_data_2d(self, n_bins = 10):

        # generating an instance of the class BinnedData2D
        bd_in = BinnedData2D()

        # setting the attribute source of the output equal to the source of the dataset
        bd_in.source = self.source

        # setting the filter of the output equat to the filter of the dataset
        bd_in.filter = self.filter

        # setting the number of bins of the output equal to the number of bin that the method receives as an imput
        bd_in.n_bins = n_bins

        # Extracting pi and imp
        database_reduced = self.db_fil.loc[:,['pi','imp']]

        # Generating the bin extremes
        bin_end_imp_pi = pl.percentile(database_reduced.pi,list(100.*pl.arange(bd_in.n_bins+1.)/(bd_in.n_bins)))

        # Adjusting the last bin extreme
        bin_end_imp_pi[-1] = bin_end_imp_pi[-1] + 0.00001

        # Assigning each point to a bin
        database_reduced['fac_pi'] = pl.digitize(database_reduced.pi,bin_end_imp_pi)

        # Using a groupby in order to generate average pi and imp for each bin, assigning the output to df_imp
        df_gp = database_reduced[['pi','imp','fac_pi']].groupby('fac_pi')
        df_imp = pd.concat([df_gp.mean(),df_gp.imp.std(),df_gp.imp.count()], axis=1)
        df_imp.columns = ['pi','imp','stdd','nn']

        # Setting the data of the output equal to the result of the binning procedure
        bd_in.data = df_imp

        # returning the filled instance of the class BinnedData2D
        return bd_in
示例#3
0
icd = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies])
sfr = pyl.asarray([pyl.log10(galaxy.ssfr) for galaxy in galaxies])

# plot the data
f1s1.scatter(icd, sfr, c='0.8', edgecolor='0.8', s=25, label='Data')

#plot the outliers
for i, s in zip(icd, sfr):
    if s < -10:
        pyl.scatter(i, -10, s=100, marker=None, verts=arrow_down)
    if i > 50:
        pyl.scatter(50, s, s=100, marker=None, verts=arrow_right)

bins = pyl.linspace(icd.min(), 50, 10)
delta = bins[1] - bins[0]
idx = pyl.digitize(icd, bins)
running_median = [pyl.median(sfr[idx == k]) for k in range(10)]
#upper = [scoreatpercentile(sfr[idx==k], 75) for k in range(1,7)]
#lower = [scoreatpercentile(sfr[idx==k], 25) for k in range(1,7)]

pyl.plot(bins - delta / 2, running_median, '#A60628', lw=4, label='Median')
#pyl.plot(bins-delta/2, upper, '#348ABD', '--', lw=4, label='Quartile')
#pyl.plot(bins-delta/2, lower, '#348ABD', '--', lw=4)

# add the speagle relation
from astLib.astCalc import tz

t = tz(2.25)
m = 10
sfr = (0.84 - 0.026 * t) * m - (6.51 - 0.11 * t)
示例#4
0
	inds_truepos = pylab.find(truefalse_array == 0)
	inds_trueneg = pylab.find(truefalse_array == 1)
	inds_falsepos = pylab.find(truefalse_array == 2)
	inds_falseneg = pylab.find(truefalse_array == 3)

	zspec = pylab.array(zspec)
	zphot = pylab.array(zphot)
	lmass = pylab.array(lmass)
	zlss = pylab.array(zlss)

	dzspec = (zspec - zlss) / (1 + zspec)
	dzphot = (zphot - zlss) / (1 + zphot)


	###  binning by mass
	digi_truepos = pylab.digitize(lmass[inds_truepos], lmassbins)
	digi_trueneg = pylab.digitize(lmass[inds_trueneg], lmassbins)
	digi_falsepos = pylab.digitize(lmass[inds_falsepos], lmassbins)
	digi_falseneg = pylab.digitize(lmass[inds_falseneg], lmassbins)

	bincount_truepos = pylab.bincount(digi_truepos, minlength=len(lmassbins)+1)[1:-1]
	bincount_trueneg = pylab.bincount(digi_trueneg, minlength=len(lmassbins)+1)[1:-1]
	bincount_falsepos = pylab.bincount(digi_falsepos, minlength=len(lmassbins)+1)[1:-1]
	bincount_falseneg = pylab.bincount(digi_falseneg, minlength=len(lmassbins)+1)[1:-1]

	n_truepos[i] += bincount_truepos
	n_trueneg[i] += bincount_trueneg
	n_falsepos[i] += bincount_falsepos
	n_falseneg[i] += bincount_falseneg

print 'done with +/- 1.5 sigma_zphot'
		###  LSS members selected by zspec
		members_zspec = pylab.find((f.cat.use[f.inds_spatial] == 1) &
	    		                   (dr_pkpc_min[f.inds_spatial] <= 1500.) &
			                       (f.cat.z_spec[f.inds_spatial] > zlo_spec) &
	    		                   (f.cat.z_spec[f.inds_spatial] < zhi_spec))

		###  LSS candidates selected by zphot
		members_zphot = pylab.find((f.cat.use[f.inds_spatial] == 1) &
			                       (f.cat.z_spec[f.inds_spatial] < 0) &
	    		                   (dr_pkpc_min[f.inds_spatial] <= 1500.) &
			                       (f.fout.z[f.inds_spatial] > zlo_phot) &
	   			                   (f.fout.z[f.inds_spatial] < zhi_phot))


		###  binning galaxies by stellar mass
		digi_mass_zspec = pylab.digitize(f.fout.lmass[f.inds_spatial][members_zspec], lmassbins)
		digi_mass_zphot = pylab.digitize(f.fout.lmass[f.inds_spatial][members_zphot], lmassbins)

		ngal_bins_zspec = pylab.bincount(digi_mass_zspec, minlength=len(lmassbins)+1)[1:-1]
		ngal_bins_zphot = pylab.bincount(digi_mass_zphot, minlength=len(lmassbins)+1)[1:-1]
		ngal_bins_zphot_corr = ngal_bins_zphot * corr_factor
		nfinal[i] += ngal_bins_zspec + ngal_bins_zphot_corr






		###################
		###  STAR-FORMING
		###################
示例#6
0
galaxies = filter(lambda galaxy: galaxy.ston_I > 30.0, galaxies)

f1 = pyl.figure(1, figsize=(6, 4))
f1s1 = f1.add_subplot(111)

icd = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies])
icdc = pyl.asarray([galaxy.ICD_IH_cored * 100 for galaxy in galaxies])
y = (icd - icdc) / icd

f1s1.scatter(icd, y, c="0.8", edgecolor="0.8", s=25, label="Data")

f1s1.axhline(0.0, c="k", lw=1)

bins = pyl.linspace(icd.min(), icd.max(), 10)
delta = bins[1] - bins[0]
idx = pyl.digitize(icd, bins)
running_median = [pyl.median(y[idx == k]) for k in range(10)]
pyl.plot(bins - delta / 2, running_median, c="#A60628", lw=4, label="Median")

f1s1.set_xlim(-5, 50)
f1s1.set_ylim(-1, 1)

f1s1.set_xlabel(r"$\xi[i_{775},H_{160}]$ (%)")
# f1s1.set_ylabel(r'$(\xi[i_{775},H_{160}]_{out} -\xi[i_{775},H_{160}])\xi[i_{775},H_{160}]$')

f1s1.set_ylabel("Fractional Change")

pyl.legend(loc="upper right")

pyl.tight_layout()
pyl.show()
示例#7
0
文件: sfrratio.py 项目: boada/ICD
galaxies = pickle.load(open("galaxies.pickle", "rb"))
galaxies = filter(lambda galaxy: galaxy.sfrir != None and galaxy.ston_I > 30.0, galaxies)

f = pyl.figure(1)
f1 = f.add_subplot(311)
f2 = f.add_subplot(312)
f3 = f.add_subplot(313)

for galaxy in galaxies:
    f1.scatter(galaxy.ICD_IH * 100, galaxy.sfrtotal / galaxy.sfr2800, c="0.8", edgecolor="0.8", s=50)

# now add the medians
x = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies])
y = pyl.asarray([galaxy.sfrtotal / galaxy.sfr2800 for galaxy in galaxies])
bins = pyl.linspace(0, 55, 11)
idx = pyl.digitize(x, bins)
delta = bins[1] - bins[0]
running = [pyl.median(y[idx == k]) for k in range(11)]
f1.plot(bins - delta / 2, running, "r--", lw=4)

galaxies = filter(lambda galaxy: 10 < galaxy.Mass < 11, galaxies)

for galaxy in galaxies:
    f2.scatter(galaxy.ICD_IH * 100, galaxy.sfrtotal / galaxy.sfr2800, c="0.8", edgecolor="0.8", s=50)

# now add the medians
x = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies])
y = pyl.asarray([galaxy.sfrtotal / galaxy.sfr2800 for galaxy in galaxies])
bins = pyl.linspace(0, 55, 11)
idx = pyl.digitize(x, bins)
delta = bins[1] - bins[0]
示例#8
0
                                           range=([ax2[0],
                                                   ax2[1]], [ax2[2], ax2[3]]))
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
asdf = sp2.imshow(pylab.log10(hist2d.T + 1),
                  extent=extent,
                  interpolation='nearest',
                  cmap=pylab.cm.Greens)
asdf.set_clim(0, pylab.log10(hist2d.max()) * 1.)
sp2.set_aspect('auto')

lmassbins = pylab.arange(8.75, 11.8, 0.25)
lmassbars = (lmassbins[1:] + lmassbins[:-1]) / 2
zbins = pylab.arange(0.5, 1.4, 0.1)
zbars = (zbins[1:] + zbins[:-1]) / 2

digi_lmass = pylab.digitize(lmass_afta_1d, lmassbins)
digi_z = pylab.digitize(z_1d, zbins)

dm_medians = []
dm_nmads = []
z_medians = []
z_nmads = []

for dmi in range(1, len(lmassbins)):
    inds = pylab.find(digi_lmass == dmi)
    dm_medians.append(pylab.median(lmass_b4_1d[inds] - lmass_afta_1d[inds]))
    dm_nmads.append(mypy.nmad(lmass_b4_1d[inds] - lmass_afta_1d[inds]))

for zi in range(1, len(zbins)):
    inds = pylab.find(digi_z == zi)
    z_medians.append(pylab.median(lmass_b4_1d[inds] - lmass_afta_1d[inds]))
示例#9
0
            columns={
                'quantity': func + 'Quantity',
                'price': func + 'Price',
                'priceAll': func + 'PriceAll'
            }).fillna(0))
    numFeatures += [func + 'Quantity', func + 'Price', func + 'PriceAll']

newR = newR.join(
    R.groupby('id').agg({
        'description': lambda x: ' '.join(x.values.astype(str))
    }).rename(columns={'description': 'resource_description'}))

T = T.join(newR, on='id')

# if you visit the donors website, it has categorized the price by these bins:
T['price_category'] = pl.digitize(T.priceAll,
                                  [0, 50, 100, 250, 500, 1000, pl.inf])
numFeatures.append('price_category')
# the difference of max and min of price and quantity per item can also be relevant
for c in ['Quantity', 'Price', 'PriceAll']:
    T['max%s_min%s' % (c, c)] = T['max%s' % c] - T['min%s' % c]
    numFeatures.append('max%s_min%s' % (c, c))

del Ttr, Tts, R, newR
gc.collect()

le = LabelEncoder()
T['teacher_id'] = le.fit_transform(T['teacher_id'])
T['teacher_gender_unknown'] = T.teacher_prefix.apply(
    lambda x: int(x not in ['Ms.', 'Mrs.', 'Mr.']))
numFeatures += [
    'teacher_number_of_previously_posted_projects', 'teacher_id',
###  calculating MFs
dm = 0.25
lmassbins = pylab.arange(9.5 - dm / 2., 11.5 + dm, dm)
lmassbars = (lmassbins[1:] + lmassbins[:-1]) / 2.

xlo, xhi = 25. * 100. / dx_map, 100 - 25. * 100. / dx_map
ylo, yhi = 25. * 100. / dy_map, 100 - 25. * 100. / dy_map

inds0 = pylab.find((overdens_arr > 0.0) & (overdens_arr < 0.5)
                   & (simdata.x_cMpc > xlo) & (simdata.x_cMpc < xhi)
                   & (simdata.y_cMpc > ylo) & (simdata.y_cMpc < yhi))
inds1 = pylab.find((overdens_arr > 0.5) & (overdens_arr < 1.0)
                   & (simdata.x_cMpc > xlo) & (simdata.x_cMpc < xhi)
                   & (simdata.y_cMpc > ylo) & (simdata.y_cMpc < yhi))
inds2 = pylab.find((overdens_arr > 1.0) & (overdens_arr < 1.5)
                   & (simdata.x_cMpc > xlo) & (simdata.x_cMpc < xhi)
                   & (simdata.y_cMpc > ylo) & (simdata.y_cMpc < yhi))
inds3 = pylab.find((overdens_arr > 1.5) & (overdens_arr < 2.0)
                   & (simdata.x_cMpc > xlo) & (simdata.x_cMpc < xhi)
                   & (simdata.y_cMpc > ylo) & (simdata.y_cMpc < yhi))

digi0 = pylab.digitize(pylab.log10(simdata.stellarMass[inds0]), lmassbins)
digi1 = pylab.digitize(pylab.log10(simdata.stellarMass[inds1]), lmassbins)
digi2 = pylab.digitize(pylab.log10(simdata.stellarMass[inds2]), lmassbins)
digi3 = pylab.digitize(pylab.log10(simdata.stellarMass[inds3]), lmassbins)

ngal0 = pylab.bincount(digi0, minlength=len(lmassbins) + 1)[1:-1]
ngal1 = pylab.bincount(digi1, minlength=len(lmassbins) + 1)[1:-1]
ngal2 = pylab.bincount(digi2, minlength=len(lmassbins) + 1)[1:-1]
ngal3 = pylab.bincount(digi3, minlength=len(lmassbins) + 1)[1:-1]
        subinds = pylab.find((f.cat.use[f.inds_spatial] == 1)
                             & (f.fout.z[f.inds_spatial] > zlo)
                             & (f.fout.z[f.inds_spatial] < zhi))

        subinds_massive = pylab.find((f.cat.use[f.inds_spatial] == 1)
                                     & (f.fout.z[f.inds_spatial] > zlo)
                                     & (f.fout.z[f.inds_spatial] < zhi)
                                     & (f.fout.lmass[f.inds_spatial] > 11))

        if 0.19 < zlo < 0.76:
            for si in subinds_massive:
                s[fi] += ' mugshot_%05i_%s.pdf' % (
                    f.cat.id[f.inds_spatial][si], f.version)

        digi_mass = pylab.digitize(f.fout.lmass[f.inds_spatial][subinds],
                                   massbins)

        ngal_bins = pylab.bincount(digi_mass,
                                   minlength=len(massbins) + 1)[1:-1]
        nlo_poisson, nhi_poisson = [], []
        for n in ngal_bins:
            nhi, nlo = mypy.massfunc.confidence_interval(n)
            nlo_poisson.append(nlo)
            nhi_poisson.append(nhi)
        nlo_poisson, nhi_poisson = pylab.array(nlo_poisson), pylab.array(
            nhi_poisson)

        phi_bins = ngal_bins * 1. / volume / dm

        ephi_lo = phi_bins * nlo_poisson / ngal_bins
        ephi_lo[pylab.isnan(ephi_lo)] = 0
示例#12
0
###########################
## Measuring temporary impact as a function of the daily rate Q/V_D
##
## Generating evenly populated bins of \pi by means of percentile
## Assigning to each metaorder the corresponding bin in df_in.fac_pi
## Evaluating the average daily rate and impact for each bin, standard deviation and counting by means of a groupby
## Fitting a power-law and a logarithmic function
## Plotting
###########################


print('Measuring temporary impact as a function of the daily rate Q/V_D ...')
n_bins_imp_pi = 30
bin_end_imp_pi = pl.percentile(df_in.pi,list(100.*pl.arange(n_bins_imp_pi+1.)/(n_bins_imp_pi)))
bin_end_imp_pi[-1] = bin_end_imp_pi[-1] + 0.00001	# fixing the last extreme of the bins
df_in['fac_pi'] = pl.digitize(df_in.pi,bin_end_imp_pi)
df_gp = df_in[['pi','imp','fac_pi']].groupby('fac_pi')
df_imp_1d = pd.concat([df_gp.mean(),df_gp.imp.std(),df_gp.imp.count()], axis=1)
df_imp_1d.columns = ['pi','imp','stdd','nn']

## Fitting temporary impact as a function of the daily rate Q/V_D
print('Fitting temporary impact as a function of the daily rate Q/V_D...')
# fitting a power-law function
ar_pl = [0., 0.3] 	# extremes of the grid of the starting points for the non-linear optimisation algorithm
br_pl = [0., 1.]	# extremes of the grid of the starting points for the non-linear optimisation algorithm
def ff_pl(x, a, b): return a * pow(x,b)
par_pl,vv_pl,chi_pl = fit_nonlin_1d_2p(ff_pl,df_imp_1d,ar_pl,br_pl)

# fitting a logarithmic function
ar_lg = [0., 0.1]	# extremes of the grid of the starting points for the non-linear optimisation algorithm
br_lg = [50., 500.]	# extremes of the grid of the starting points for the non-linear optimisation algorithm
示例#13
0
from astLib import astStats

galaxies = pickle.load(open('galaxies.pickle', 'rb'))
galaxies = filter(lambda galaxy: galaxy.ston_I >30. and galaxy.clumps != None,
        galaxies)

f = pyl.figure(1, figsize=(6,4))
f1s1 = f.add_subplot(111)

d = [[galaxy.clumps, galaxy.ICD_IH*100] for galaxy in galaxies]
d = pyl.asarray(d)

f1s1.scatter(d[:,0], d[:,1], s=50, c='0.8', edgecolor='0.8')

bins = pyl.arange(0, 50, 5)
index = pyl.digitize(d[:,1], bins) - 1
delta = bins[1] - bins[2]
avgs = [pyl.mean(d[:,0][index==k]) for k in range(len(bins))]
#avgs = [astStats.biweightLocation(d[:,0][index==k], 6.0) for k in range(len(bins))]

#avgs = astStats.runningStatistic(d[:,1], d[:,0])
#bins = pyl.linspace(d[:,1].min(), d[:,1].max(), 10)
#delta = bins[1] - bins[0]

#f1s1.hlines(bins - delta/2., [0], avgs, lw=2, color='#A60628')
f1s1.plot(avgs, bins - delta/2.,  lw=2, color='#A60628')

avg=[]
for i in range(9):
    d = [galaxy.ICD_IH*100 for galaxy in galaxies if galaxy.clumps ==i]
    avg.append(astStats.biweightLocation(d, 6.0))
示例#14
0
文件: sfrratio.py 项目: boada/ICD
f1 = f.add_subplot(311)
f2 = f.add_subplot(312)
f3 = f.add_subplot(313)

for galaxy in galaxies:
    f1.scatter(galaxy.ICD_IH * 100,
               galaxy.sfrtotal / galaxy.sfr2800,
               c='0.8',
               edgecolor='0.8',
               s=50)

# now add the medians
x = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies])
y = pyl.asarray([galaxy.sfrtotal / galaxy.sfr2800 for galaxy in galaxies])
bins = pyl.linspace(0, 55, 11)
idx = pyl.digitize(x, bins)
delta = bins[1] - bins[0]
running = [pyl.median(y[idx == k]) for k in range(11)]
f1.plot(bins - delta / 2, running, 'r--', lw=4)

galaxies = filter(lambda galaxy: 10 < galaxy.Mass < 11, galaxies)

for galaxy in galaxies:
    f2.scatter(galaxy.ICD_IH * 100,
               galaxy.sfrtotal / galaxy.sfr2800,
               c='0.8',
               edgecolor='0.8',
               s=50)

# now add the medians
x = pyl.asarray([galaxy.ICD_IH * 100 for galaxy in galaxies])