示例#1
0
def find_contours_2D(x_values, y_values, xbins, weights=None, c1=16, c2=84):
    """
	Find upper and lower contours and median
	x_values = array, input for hist2d for x axis (typically truth)
	y_values = array, input for hist2d for y axis (typically reconstruction)
	xbins = values for the starting edge of the x bins (output from hist2d)
	c1 = percentage for lower contour bound (16% - 84% means a 68% band, so c1 = 16)
	c2 = percentage for upper contour bound (16% - 84% means a 68% band, so c2=84)
	Returns:
		x = values for xbins, repeated for plotting (i.e. [0,0,1,1,2,2,...]
		y_median = values for y value medians per bin, repeated for plotting (i.e. [40,40,20,20,50,50,...]
		y_lower = values for y value lower limits per bin, repeated for plotting (i.e. [30,30,10,10,20,20,...]
		y_upper = values for y value upper limits per bin, repeated for plotting (i.e. [50,50,40,40,60,60,...]
	"""
    if weights is not None:
        import wquantiles as wq
    y_values = numpy.array(y_values)
    indices = numpy.digitize(x_values, xbins)
    r1_save = []
    r2_save = []
    median_save = []
    for i in range(1, len(xbins)):
        mask = indices == i
        if len(y_values[mask]) > 0:
            if weights is None:
                r1, m, r2 = numpy.percentile(y_values[mask], [c1, 50, c2])
            else:
                r1 = wq.quantile(y_values[mask], weights[mask], c1 / 100.)
                r2 = wq.quantile(y_values[mask], weights[mask], c2 / 100.)
                m = wq.median(y_values[mask], weights[mask])
        else:
            #print(i,'empty bin')
            r1 = numpy.nan
            m = numpy.nan
            r2 = numpy.nan
        median_save.append(m)
        r1_save.append(r1)
        r2_save.append(r2)
    median = numpy.array(median_save)
    lower = numpy.array(r1_save)
    upper = numpy.array(r2_save)

    # this is a funny way of outputting the result
    # which was in the original code we borrowed from the oscnext folks
    # remove it for now
    # x = list(itertools.chain(*zip(xbins[:-1],xbins[1:])))
    # y_median = list(itertools.chain(*zip(median,median)))
    # y_lower = list(itertools.chain(*zip(lower,lower)))
    # y_upper = list(itertools.chain(*zip(upper,upper)))
    # return x, y_median, y_lower, y_upper

    # the first return with the [1:] and [:-1] is about locating the bin centers
    return (xbins[1:] + xbins[:-1]) / 2, median, lower, upper
示例#2
0
 def weighted_cuts(self, col, weight, nbin):
     wtd_cuts = [
         wquantiles.quantile(col, weight, x / nbin) for x in range(nbin)
     ]
     wtd_cuts_str = ["(-inf,%s]" % wtd_cuts[0]]
     inds = np.searchsorted(wtd_cuts, self[var])
     for i in range(1, len(wtd_cuts) - 1):
         wtd_cuts_str.append("(%s,%s]" % (wtd_cuts[i - 1], wtd_cuts[i]))
示例#3
0
def estimate_age(name, sex):
    d = age_alive_dead(name, sex)
    result = dict(name=name, sex=sex)
    result.update({
        'q' + str(q): quantile(d.age, d.n_alive, q / 100)
        for q in [25, 50, 75]
    })
    return result
示例#4
0
def estimate_age(name, sex):
    data = get_data(name, sex)
    qs = [0.75, 0.5, 0.25]
    quantiles = [2016 - int(quantile(data.year, data.n_alive, q)) for q in qs]
    result = dict(zip(['q25', 'q50', 'q75'], quantiles))
    result['p_alive'] = round(data.n_alive.sum() / data.births.sum() * 100, 2)
    result['sex'] = sex
    result['name'] = name
    return pd.Series(result)
示例#5
0
 def test_weighted_median_3D(self):
     arr1 = quantile(self.a3D, self.aw, 0.5)
     arr2 = np.array(
         [[43.66666667, 91., 35., 50., 23.],
          [30.66666667, 89., 75.66666667, 6., 48.33333333],
          [49., 54.66666667, 16.33333333, 89.33333333, 26.33333333],
          [63., 34., 37.33333333, 57.66666667, 82.],
          [34., 32., 29., 37., 51.33333333]])
     #print(arr1, arr2)
     nptest.assert_array_almost_equal(arr1, arr2)
示例#6
0
def weighted_cuts(col, weight, nbin):
    wtd_cuts = [
        wquantiles.quantile(col, weight,
                            float(x) / float(nbin)) for x in range(1, nbin)
    ]
    wtd_cuts_str = ["(-inf,%s]" % wtd_cuts[0]]
    for i in range(len(wtd_cuts) - 1):
        wtd_cuts_str.append("(%s,%s]" % (wtd_cuts[i], wtd_cuts[i + 1]))
    wtd_cuts_str.append("(%s,inf]" % wtd_cuts[len(wtd_cuts) - 1])
    buckets = np.take(wtd_cuts_str, np.searchsorted(wtd_cuts, col))
    return buckets
示例#7
0
def running_quantiles_loess(x, y, quantiles, x_result=None, npoints=50):
    if x_result is None:
        x_result = np.linspace(np.nanmin(x), np.nanmax(x), npoints)
    else:
        npoints = len(x_result)
    q_result = np.zeros((npoints, len(quantiles)))

    for i in range(npoints):
        weights = loess_weights(x_result[i], x)
        print(weights)
        for j in range(len(quantiles)):
            q_result[i, j] = wquantiles.quantile(y, weights, quantiles[j])
    return (x_result, q_result)
示例#8
0
 def fu(x):
     d={}
     d['date'] = x['date'].max()
     d['active_seller']  = x['active_seller'].max()
     d['amount_of_ads'] = x['active_seller'].count()
     d['category_id'] = sklearn.utils.extmath.weighted_mode(x['category_id'],np.nan_to_num(x['daily_sales']))[0].max()
     d['daily_sales_sum'] = x['daily_sales'].sum()
     try:
         d['ad_type_mean'] = np.average(x['ad_type_id'],weights = np.nan_to_num(x['daily_sales']))
     except:
         d['ad_type_mean'] = x['ad_type_id'].mean()
     d['daily_views_sum'] = x['daily_views'].sum()
     
     if x['daily_sales'].max()> 0:
         d['price_median'] = wq.quantile(x['price'],x['daily_sales'],0.5)
     else:
         d['price_median'] = wq.quantile(x['price'],len(x['price'])*[1],0.5)
     if np.isnan(d['price_median']):
         d['price_median'] = np.median(d['price_median'])
     if x['daily_sales'].max() > 0:
         d['position_median'] = wq.quantile(x['position'],len(x['price'])*[1],0.5)
     else:
         d['position_median'] = wq.quantile(x['position'],x['daily_sales'],0.5)
     if np.isnan(d['position_median']):
         d['position_median'] = np.median(d['position_median'])
     
     d['sold_quantity_sum'] = x['sold_quantity'].sum()
     d['gini_ads'] = gini(x['daily_revenues'].values)
     if x['daily_views'].sum() > 0:
         d['conversion'] = x['daily_sales'].sum()/x['daily_views'].sum()
     else:
         d['conversion'] = 0
     d['share'] = x['daily_revenues'].sum()/x['market_size'].max()
     if np.isnan(d['share']):
         d['share'] = 0
     return (pd.Series(d))
示例#9
0
def estimate_age(name, sex):
    # probability of being alive is sum(n_alive) / sum(births)
    df = get_data(name, sex)
    p_alive = df['n_alive'].sum() / df['births'].sum()
    year = datetime.datetime.now().year
    df['age'] = year - df['year']
    df['weights'] = df['n_alive'] / df['n_alive'].sum()
    get_quantile = lambda q: quantile(df['age'], df['weights'], q)

    q25 = get_quantile(0.25)
    q50 = get_quantile(0.5)
    q75 = get_quantile(0.75)

    data = {
        'p_alive': p_alive,
        'q25': q25,
        'q50': q50,
        'q75': q75,
        'name': name,
        'sex': sex
    }
    s = pd.Series(data=data)
    return s
def fu(x,one_hot_feature_list =[]):
    d={}
    d['market_median_price'] = x['market_median_price'].max()
    d['market_size'] = x['market_size'].max()
    try:
        d['market_size_units'] = x['market_size_units'].mean()
    
    except:
        pass
    #d['date'] = x['date'].max()
    d['amount_of_ads'] = x['ad_id'].count()
    
    d['active_seller']  = x['active_seller'].max()
    d['category_id'] = sklearn.utils.extmath.weighted_mode(x['category_id'],np.nan_to_num(x['daily_sales']))[0].max()
    try:
        d['ad_type_mean'] = np.average(x['ad_type_id'],weights = np.nan_to_num(x['daily_sales']))
    except:
        d['ad_type_mean'] = x['ad_type_id'].mean()
    
    
    d['position_max'] = 1/np.log1p(np.min(x['position']))
    d['price_min'] = np.min(x['price'])
    
    d['daily_sales_sum'] = x['daily_sales'].sum()

    d['daily_views_sum'] = x['daily_views'].sum()
    d['daily_views_share'] = d['daily_views_sum']/x['market_daily_views'].max()
    if x['daily_sales'].max()<= 0:
        d['price_median'] = np.median(x['price'])
    else:
        d['price_median'] = wq.quantile(x['price'],x['daily_sales'],0.25)
    if np.isnan(d['price_median']):
        d['price_median'] = np.median(x['price'])
    
    try:
        d['relative_price'] = d['price_median']/x['market_median_price'].max()
    except:
        print('not ok')
        pass
    
    if x['daily_sales'].max() <= 0:
        d['position_median'] = 1/np.log1p(np.median(x['position']))
    else:
        d['position_median'] = 1/np.log1p(wq.quantile(x['position'],x['daily_sales'],0.25))
    
    if np.isnan(d['position_median']):
        d['position_median'] = np.log1p(1/np.median(d['position_median']))
    
    d['sold_quantity_sum'] = x['sold_quantity'].sum()
    d['gini_ads'] = gini(x['daily_revenues'].values)
    if x['daily_views'].sum() > 0:
        d['conversion'] = x['daily_sales'].sum()/x['daily_views'].sum()
    else:
        d['conversion'] = 0
    
    d['share'] = x['daily_revenues'].sum()/x['market_size'].max()
    d['daily_revenues_sum'] = x['daily_revenues'].sum()
    if np.isnan(d['share']):
        d['share'] = 0
    
    
    for feature in one_hot_feature_list:
        d[feature]  = x[feature].mean()
            
    return (pd.Series(d))
示例#11
0
print(df.columns)
#df["Hash"] = df["Addr"].apply(lambda x: (x >> 15)&0x3)

addresses = df["address"].unique()
print(addresses)
print(*[bin(a) for a in addresses], sep='\n')

print(df.head())

print(df["hash"].unique())

min_time = df["time"].min()
max_time = df["time"].max()

q10s = [wq.quantile(df["time"], df[col], 0.1) for col in sample_flush_columns]
q90s = [wq.quantile(df["time"], df[col], 0.9) for col in sample_flush_columns]

graph_upper = int(((max(q90s) + 19) // 10) * 10)
graph_lower = int(((min(q10s) - 10) // 10) * 10)
# graph_lower = (min_time // 10) * 10
# graph_upper = ((max_time + 9) // 10) * 10

print("graphing between {}, {}".format(graph_lower, graph_upper))

df_main_core_0 = df[df["main_core"] == 0]
#df_helper_core_0 = df[df["helper_core"] == 0]

colours = ["b", "r", "g", "y"]

示例#12
0
 slicepropselect = propagated[
     (propagated.transpose()[0] > comoving_distance(Zarr[i])) *
     (propagated.transpose()[0] < comoving_distance(Zarr[i + 1]))]
 sliceweights = weights[
     (propagated.transpose()[0] > comoving_distance(Zarr[i])) *
     (propagated.transpose()[0] < comoving_distance(Zarr[i + 1]))]
 histazslice = np.histogram2d(slicepropselect.transpose()[1],
                              slicepropselect.transpose()[2],
                              weights=sliceweights,
                              bins=[27, 56],
                              range=[[0, 27], [1, 57]])
 histazslice = histazslice[0] / np.sum(histazslice[0])
 comphistlist.append(histazslice)
 medenergy = np.sum(slicepropselect.transpose()[3] *
                    sliceweights) / np.sum(sliceweights)
 energy1 = quantile(slicepropselect.transpose()[3], sliceweights, 0.1)
 energy5 = quantile(slicepropselect.transpose()[3], sliceweights, 0.5)
 energy9 = quantile(slicepropselect.transpose()[3], sliceweights, 0.9)
 medatonum = np.sum(slicepropselect.transpose()[1] *
                    sliceweights) / np.sum(sliceweights)
 atonu1 = quantile(slicepropselect.transpose()[1], sliceweights, 0.1)
 atonu5 = quantile(slicepropselect.transpose()[1], sliceweights, 0.5)
 atonu9 = quantile(slicepropselect.transpose()[1], sliceweights, 0.9)
 print 'Mean Energy from this slice', medenergy
 #print 'Composition from this slice', histazslice
 print 'Mean atomic number from this slice', medatonum
 medano.append(medatonum)
 ano1.append(atonu1)
 ano5.append(atonu5)
 ano9.append(atonu9)
 meden.append(medenergy)
示例#13
0
            depths[x,y,z] = distMat[nodeLeafPairs[y][z]].mean() if not nodes_all[y][z].is_leaf() else 0.0


#scale depths by dividing by the depth of the root
#the first node in each topo is the root, as the traversal goes to the root first
depths = depths / np.repeat(depths[:,:,0,np.newaxis], depths.shape[2], axis=2)
#anyehere we have nan is where the root depth was zero. This happens where we had missing data. So we can set all these tree depths to zero.
depths = np.nan_to_num(depths)


depths_average = np.average(depths, axis = 0, weights=np.repeat(weights[:,:,np.newaxis], depths.shape[2], axis=2))

depths_median = [[wquantiles.median(depths[:,j,k], weights=weights[:,j]) for k in range(depths.shape[2])] for j in range(depths.shape[1])]

if args.quantiles:
    depths_qL = [[wquantiles.quantile(depths[:,j,k], weights[:,j], args.quantiles[0]) for k in range(depths.shape[2])] for j in range(depths.shape[1])]
    depths_qU = [[wquantiles.quantile(depths[:,j,k], weights[:,j], args.quantiles[1]) for k in range(depths.shape[2])] for j in range(depths.shape[1])]


#cols = np.array([
    #"#2BCE48", #Green
    #"#005C31", #Forest
    #"#94FFB5", #Jade
    #"#9DCC00", #Lime
    #"#426600", #Quagmire
    #"#00998F", #Turquoise
    #"#5EF1F2", #Sky
    #"#0075DC", #Blue
    #"#003380", #Navy
    #"#740AFF", #Violet
    #"#FF5005", #Zinnia
示例#14
0
depths = np.nan_to_num(depths)

depths_average = np.average(depths,
                            axis=0,
                            weights=np.repeat(weights[:, :, np.newaxis],
                                              depths.shape[2],
                                              axis=2))

depths_median = [[
    wquantiles.median(depths[:, j, k], weights=weights[:, j])
    for k in range(depths.shape[2])
] for j in range(depths.shape[1])]

if args.quantiles:
    depths_qL = [[
        wquantiles.quantile(depths[:, j, k], weights[:, j], args.quantiles[0])
        for k in range(depths.shape[2])
    ] for j in range(depths.shape[1])]
    depths_qU = [[
        wquantiles.quantile(depths[:, j, k], weights[:, j], args.quantiles[1])
        for k in range(depths.shape[2])
    ] for j in range(depths.shape[1])]

#cols = np.array([
#"#2BCE48", #Green
#"#005C31", #Forest
#"#94FFB5", #Jade
#"#9DCC00", #Lime
#"#426600", #Quagmire
#"#00998F", #Turquoise
#"#5EF1F2", #Sky
示例#15
0
def plot_1d_binned_slices(truth, reco1, reco2=None,
        xarray1=None,xarray2=None,truth2=None,\
        plot_resolution=False, use_fraction = False,\
        bins=10,xmin=-1.,xmax=1,style="contours",\
        x_name = "Zenith", x_units = "",\
        y_units=None,
        reco1_name = "Reco 1", reco2_name = "Reco 2",\
        reco1_weight = None, reco2_weight = None,
        save=True,savefolder='.'):
    """Plots different energy slices vs each other (systematic set arrays)
	Receives:
		truth = 1D array with truth values
		reco1 = 1D array that has reconstructed results
		reco2 = optional, 1D array that has an alternate reconstructed results
		xarray1 = optional, 1D array that the reco1 variable (or resolution) will be plotted against, if none is given, will automatically use truth1
		xarray2 = optional, 1D array that the reco2 variable (or resolution2) will be plotted against, if none is given, will automatically use xarray1
		truth2 = 1D array with truth values used to calculate resolution2
		plot_resolution = use resolution (reco - truth) instead of just reconstructed values
		use_fraction = bool, use fractional resolution instead of absolute, where (reco - truth)/truth
		style = "errorbars" is only string that would trigger change (to errorbar version), default is contour plot version
		bins = integer number of data points you want (range/bins = width)
		xmin = minimum truth value to start cut at (default = -1.)
		xmax = maximum truth value to end cut at (default = 1.)
		x_name = variable for x axis (what is the truth)
		x_units = units for truth/x-axis variable
		reco1_name = name for reconstruction 1
		reco2_name = name for reconstruction 2
		reco1_weight = 1D array for reco1 weights, if left None, will not use
		reco2_weight = 1D array for reco2 weights, if left None, will not use
	Returns:
		Scatter plot with truth bins on x axis (median of bin width)
		y axis has median of resolution or absolute reconstructed value with error bars containing given percentile
	"""

    percentile_in_peak = 68.27  #CAN CHANGE
    left_tail_percentile = (100. - percentile_in_peak) / 2
    right_tail_percentile = 100. - left_tail_percentile
    ranges = numpy.linspace(xmin, xmax, num=bins)
    centers = (ranges[1:] + ranges[:-1]) / 2.

    # if no xarray given, automatically use truth
    if xarray1 is None:
        xarray1 = truth
    # Calculate resolution if plot_resolution flag == True
    if plot_resolution:
        if use_fraction:
            yvariable = ((reco1 - truth) / truth)  # in fraction
        else:
            yvariable = (reco1 - truth)
    else:  #use reco directly, not resolution
        y_variable = reco1
        assert use_fraction == False, "Flag for fractional resolution only, not doing resolution here"

    medians = numpy.zeros(len(centers))
    err_from = numpy.zeros(len(centers))
    err_to = numpy.zeros(len(centers))

    #Compare to second reconstruction if given
    if reco2 is not None:
        #check if some variables exist, if not, set to match reco1's
        if truth2 is None:
            truth2 = truth1
        if xarray2 is None:
            xarray2 = xarray1

        if plot_resolution:
            if use_fraction:
                yvariable2 = ((reco2 - truth2) / truth2)
            else:
                yvariable2 = (reco2 - truth2)
        else:
            yvariable2 = reco2
        medians2 = numpy.zeros(len(centers))
        err_from2 = numpy.zeros(len(centers))
        err_to2 = numpy.zeros(len(centers))

    # Find median and percentile bounds for data
    for i in range(len(ranges) - 1):

        # Make a cut based on the truth (binned on truth)
        var_to = ranges[i + 1]
        var_from = ranges[i]
        cut = (xarray1 >= var_from) & (xarray1 < var_to)
        assert sum(
            cut
        ) > 0, "No events in xbin from %s to %s for reco1, may need to change xmin, xmax, or number of bins or check truth/xarray inputs" % (
            var_from, var_to)
        if reco2 is not None:
            cut2 = (xarray2 >= var_from) & (xarray2 < var_to)
            assert sum(
                cut2
            ) > 0, "No events in xbin from %s to %s for reco2, may need to change xmin, xmax, or number of bins or check truth2/xarray2 inputs" % (
                var_from, var_to)

        #find number of reco1 (or resolution) in this bin
        if reco1_weight is None:
            lower_lim = numpy.percentile(yvariable[cut], left_tail_percentile)
            upper_lim = numpy.percentile(yvariable[cut], right_tail_percentile)
            median = numpy.percentile(yvariable[cut], 50.)
        else:
            import wquantiles as wq
            lower_lim = wq.quantile(yvariable[cut], reco1_weight[cut],
                                    left_tail_percentile)
            upper_lim = wq.quantile(yvariable[cut], reco1_weight[cut],
                                    right_tail_percentile)
            median = wq.median(yvariable[cut], reco1_weight[cut])

        medians[i] = median
        err_from[i] = lower_lim
        err_to[i] = upper_lim

        #find number of reco2 (or resolution2) in this bin
        if reco2 is not None:
            if reco2_weight is None:
                lower_lim2 = numpy.percentile(yvariable2[cut2],
                                              left_tail_percentile)
                upper_lim2 = numpy.percentile(yvariable2[cut2],
                                              right_tail_percentile)
                median2 = numpy.percentile(yvariable2[cut2], 50.)
            else:
                import wquantiles as wq
                lower_lim2 = wq.quantile(yvariable2[cut2], reco2_weight[cut2],
                                         left_tail_percentile)
                upper_lim2 = wq.quantile(yvariable2[cut2], reco2_weight[cut2],
                                         right_tail_percentile)
                median2 = wq.median(yvariable2[cut2], reco2_weight[cut2])

            medians2[i] = median2
            err_from2[i] = lower_lim2
            err_to2[i] = upper_lim2

    # Make plot
    plt.figure(figsize=(10, 7))

    # Median as datapoint
    # Percentile as y error bars
    # Bin size as x error bars
    if style is "errorbars":
        plt.errorbar(centers,
                     medians,
                     yerr=[medians - err_from, err_to - medians],
                     xerr=[centers - ranges[:-1], ranges[1:] - centers],
                     capsize=5.0,
                     fmt='o',
                     label="%s" % reco1_name)
        #Compare to second reconstruction, if given
        if reco2 is not None:
            plt.errorbar(centers,
                         medians2,
                         yerr=[medians2 - err_from2, err_to2 - medians2],
                         xerr=[centers - ranges[:-1], ranges[1:] - centers],
                         capsize=5.0,
                         fmt='o',
                         label="%s" % reco2_name)
            plt.legend(loc="upper center")
    # Make contour plot
    # Center solid line is median
    # Shaded region is percentile
    # NOTE: plotted using centers, so 0th and last bins look like they stop short (by 1/2*bin_size)
    else:
        alpha = 0.5
        lwid = 3
        cmap = plt.get_cmap('Blues')
        colors = cmap(numpy.linspace(0, 1, 2 + 2))[2:]
        color = colors[0]
        cmap = plt.get_cmap('Oranges')
        rcolors = cmap(numpy.linspace(0, 1, 2 + 2))[2:]
        rcolor = rcolors[0]
        ax = plt.gca()
        ax.plot(centers,
                medians,
                linestyle='-',
                label="%s median" % (reco1_name),
                color=color,
                linewidth=lwid)
        ax.fill_between(centers, medians, err_from, color=color, alpha=alpha)
        ax.fill_between(centers,
                        medians,
                        err_to,
                        color=color,
                        alpha=alpha,
                        label=reco1_name + " %i" % percentile_in_peak + '%')
        if reco2 is not None:
            ax.plot(centers,
                    medians2,
                    color=rcolor,
                    linestyle='-',
                    label="%s median" % reco2_name,
                    linewidth=lwid)
            ax.fill_between(centers,
                            medians2,
                            err_from1,
                            color=rcolor,
                            alpha=alpha)
            ax.fill_between(centers,
                            medians2,
                            err_to2,
                            color=rcolor,
                            alpha=alpha,
                            label=reco2_name + " %i" % percentile_in_peak +
                            '%')

    # Extra features to have a horizontal 0 line and trim the x axis
    plt.plot([xmin, xmax], [0, 0], color='k')
    plt.xlim(xmin, xmax)

    #Make pretty labels
    plt.xlabel("%s %s" % (x_name, x_units))
    if plot_resolution:
        if use_fraction:
            plt.ylabel(
                "Fractional Resolution: \n (reconstruction - truth)/truth")
        else:
            plt.ylabel("Resolution: \n reconstruction - truth %s" % x_units)
            if y_units is not None:
                plt.ylabel("Resolution: \n reconstruction - truth %s" %
                           y_units)
    else:
        plt.ylabel("Reconstructed %s %s" (x_name, x_units))

    # Make a pretty title
    title = "%s Dependence for %s" % (x_name, reco1_name)
    if reco2 is not None:
        title += " and %s" (reco2_name)
    if plot_resolution:
        title += " Resolution"
    plt.title("%s" % (title))

    # Make a pretty filename
    savename = "%s" % (x_name.replace(" ", ""))
    if use_fraction:
        savename += "Frac"
    if plot_resolution:
        savename += "Resolution"
    if reco2 is not None:
        savename += "_Compare%s" % (reco2_name.replace(" ", ""))
    if save == True:
        plt.savefig("%s/%s.png" % (savefolder, savename))
示例#16
0
def plot_resolution_from_dict(truth,reco,keylist,\
                            cut=None,weights=None,suptitle="Compare Vertex",\
                            savefolder=None,save=False,bins=100,use_fraction=False):
    
    variables = ["x", "y", "z"]
    fig, ax = plt.subplots(1,3,figsize=(20,10))
    fig.suptitle(suptitle)
    if cut is None:
        all_truth = np.ones(len(truth))
        cut = all_truth == 1
        assert sum(cut) == truth.shape[0], "Accidentally cutting, check mask"


    for var in range(0,3):
        if use_fraction:
            title = "Fractional %s Resolution"%variables[var]
            xlabel = "(reco - truth) / truth"
        else:
            title = "%s Resolution"%variables[var]
            xlabel = "reco - truth (m)"
            #plt.legend(loc=9, bbox_to_anchor=(0.5, -0.1), ncol=2)
    
        print("Resolution %s"%variables[var])
        print('Name\t Entries\t Mean\t Median\t RMS\t Percentiles\t')
        for index in range(0,len(keylist)):
            keyname = keylist[index]
            if use_fraction:
                resolution = (reco[keyname][:,var] - truth[:,var]) / truth[:,var]
            else:
                resolution = reco[keyname][:,var] - truth[:,var]
            ax[var].hist(resolution, bins=bins, weights=weights, \
                    alpha=0.5, label="%s"%keyname);

            #Statistics
            rms = get_RMS(resolution,weights)
            if weights is not None:
                import wquantiles as wq
                r1 = wq.quantile(resolution,weights,0.16)
                r2 = wq.quantile(resolution,weights,0.84)
            else:
                r1, r2 = np.percentile(resolution, [16,84])

            #textstr = '\n'.join((
            #r'%s' % (keyname),
            #r'$\mathrm{events}=%i$' % (len(resolution), ),
            #r'$\mathrm{median}=%.2f$' % (np.median(resolution), ),
            #r'$\mathrm{RMS}=%.2f$' % (rms, ),
            #r'$\mathrm{1\sigma}=%.2f,%.2f$' % (r1,r2 )))
            #props = dict(boxstyle='round', facecolor='blue', alpha=0.3)
            #ax[var].text(0.6, 0.95, textstr, transform=ax.transAxes, fontsize=20,
            #         verticalalignment='top', bbox=props)
            
            print("%s\t %.2f\t %.2f\t %.2f\t %.2f\t %.2f, %.2f\t"%(keyname, \
                                                            len(resolution),
                                                            np.mean(resolution),\
                                                            np.median(resolution),\
                                                            rms,\
                                                            r1, r2))
        ax[var].set_title(title)
        ax[var].set_xlabel(xlabel)
        ax[var].legend(fontsize=20)

    sup = suptitle.replace(" ","")
    if save:
        if use_fraction:
            plt.savefig("%sFractionalVertexResolution_%s.png"%(savefolder,sup),bbox_inches='tight')
        else:
            plt.savefig("%sVertexResolution_%s.png"%(savefolder,sup),bbox_inches='tight')
    plt.close()