Exemplo n.º 1
0
def hpge_find_E_peaks(hist, bins, var, peaks_keV, n_sigma=5, deg=0, Etol_keV=None, var_zero=1, verbose=False):
    """ Find uncalibrated E peaks whose E spacing matches the pattern in peaks_keV

    Note: the specialization here to units "keV" in peaks and Etol is
    unnecessary. However it is kept so that the default value for Etol_keV has
    an unambiguous interpretation.

    Parameters
    ----------
    hist, bins, var: array, array, array
        Histogram of uncalibrated energies, see pgh.get_hist()
        var cannot contain any zero entries.
    peaks_keV : array
        Energies of peaks to search for (in keV)
    n_sigma : float
        Threshold for detecting a peak in sigma (i.e. sqrt(var))
    deg : int
        deg arg to pass to poly_match
    Etol_keV : float
        absolute tolerance in energy for matching peaks
    var_zero : float
        number used to replace zeros of var to avoid divide-by-zero in
        hist/sqrt(var). Default value is 1. Usually when var = 0 its because
        hist = 0, and any value here is fine.

    Returns
    -------
    detected_peak_locations : list
        list of uncalibrated energies of detected peaks
    detected_peak_energies : list
        list of calibrated energies of detected peaks
    pars : list of floats
        the parameters for poly(peaks_uncal) = peaks_keV (polyfit convention)
    """
    # clean up var if necessary
    if np.any(var == 0):
        if verbose:
            print(f'hpge_find_E_peaks: replacing var zeros with {var_zero}')
        var[np.where(var == 0)] = var_zero
    peaks_keV = np.asarray(peaks_keV)

    # Find all maxes with > n_sigma significance
    imaxes = get_i_local_maxima(hist/np.sqrt(var), n_sigma)

    # Now pattern match to peaks_keV within Etol_keV using poly_match
    detected_max_locs = pgh.get_bin_centers(bins)[imaxes]

    if Etol_keV is None:
        #estimate Etol_keV
        pt_pars, pt_covs = hpge_fit_E_peak_tops(hist, bins, var, detected_max_locs, n_to_fit=15)
        if sum(sum([sum(c) if c is not None else 0 for c in pt_covs])) == np.inf or sum(sum([sum(c) if c is not None else 0 for c in pt_covs])) == 0: print('hpge_find_E_peaks: can safely ignore previous covariance warning, not used')
        pt_pars = pt_pars[np.array([x is not None for x in pt_pars])]
        med_sigma_ratio = np.median(np.stack(pt_pars)[:,1]/np.stack(pt_pars)[:,0])

        Etol_keV = 5. * (med_sigma_ratio / 0.003)
    pars, ixtup, iytup = poly_match(detected_max_locs, peaks_keV, deg=deg, atol=Etol_keV)

    if verbose and len(ixtup) != len(peaks_keV):
        print(f'hpge_find_E_peaks: only found {len(ixtup)} of {len(peaks_keV)} expected peaks')
    return detected_max_locs[ixtup], peaks_keV[iytup], pars
Exemplo n.º 2
0
def corrDCR(df,
            etype,
            e_bins=300,
            elo=0,
            ehi=6000,
            dcr_fit_lo=-30,
            dcr_fit_hi=30):

    df_dcr_cut = df.query(
        f'dcr >{dcr_fit_lo} and dcr < {dcr_fit_hi} and {etype} > {elo} and {etype} < {ehi}'
    ).copy()

    median, xedges, binnumber = stats.binned_statistic(df_dcr_cut[etype],
                                                       df_dcr_cut['dcr'],
                                                       statistic="median",
                                                       bins=e_bins)

    en_bin_centers = pgh.get_bin_centers(xedges)

    fit_raw, cov = np.polyfit(en_bin_centers, median, deg=1, cov=True)

    const = fit_raw[0]
    offset = fit_raw[1]
    err = np.sqrt(np.diag(cov))

    print(f'Fit results\n slope: {const}\n offset: {offset}')
    return (const, offset, err)
Exemplo n.º 3
0
def corrDCR(df_cut,
            etype,
            e_bins=300,
            elo=0,
            ehi=6000,
            dcr_fit_lo=-30,
            dcr_fit_hi=30,
            quad=False,
            dcr_fit_qlo=5000,
            dcr_fit_qhi=5800):

    median, xedges, binnumber = stats.binned_statistic(
        df_cut[etype],
        df_cut['dcr'],
        statistic="median",
        bins=int((dcr_fit_hi - dcr_fit_lo) / 5),
        range=[dcr_fit_lo, dcr_fit_hi])

    en_bin_centers = pgh.get_bin_centers(xedges)

    if quad:
        qmedian, qxedges, qbinnumber = stats.binned_statistic(
            df_cut[etype],
            df_cut['dcr'],
            statistic="median",
            bins=int((dcr_fit_qhi - dcr_fit_qlo) / 5),
            range=[dcr_fit_qlo, dcr_fit_qhi])
        qen_bin_centers = pgh.get_bin_centers(qxedges)
        xen = np.concatenate((en_bin_centers, qen_bin_centers))
        ymed = np.concatenate((median, qmedian))
        print(xen)
        print(ymed)
        plt.plot(xen, ymed)
        fit_raw = np.polyfit(xen, ymed, deg=2)
        qconst = fit_raw[0]
        qlin = fit_raw[1]
        qoffset = fit_raw[2]
        df_cut['dcr_corr'] = df_cut['dcr'] - (qconst * (df_cut[etype]**2) +
                                              qlin * df_cut[etype] + qoffset)
    else:
        fit_raw = np.polyfit(en_bin_centers, median, deg=1)
        const = fit_raw[0]
        offset = fit_raw[1]

        df_cut['dcr_corr'] = df_cut['dcr'] - (const * (df_cut[etype]) + offset)

    return df_cut
Exemplo n.º 4
0
def fit_hist(func, hist, bins, var=None, guess=None,
             poissonLL=False, integral=None, method=None, bounds=None):
    """
    do a binned fit to a histogram (nonlinear least squares).
    can either do a poisson log-likelihood fit (jason's fave) or
    use curve_fit w/ an arbitrary function.

    - hist, bins, var : as in return value of pygama.histograms.get_hist()
    - guess : initial parameter guesses. Should be optional -- we can auto-guess
              for many common functions. But not yet implemented.
    - poissonLL : use Poisson stats instead of the Gaussian approximation in
                  each bin. Requires integer stats. You must use parameter
                  bounds to make sure that func does not go negative over the
                  x-range of the histogram.
    - method, bounds : options to pass to scipy.optimize.minimize

    Returns
    ------
    coeff, cov_matrix : tuple(array, matrix)
    """
    if guess is None:
        print("auto-guessing not yet implemented, you must supply a guess.")
        return None, None

    if poissonLL:
        if var is not None and not np.array_equal(var, hist):
            print("variances are not appropriate for a poisson-LL fit!")
            return None, None

        if method is None:
            method = "L-BFGS-B"

        result = minimize(neg_poisson_log_like, x0=guess,
                          args=(func, hist, bins, integral),
                          method=method, bounds=bounds)

        coeff, cov_matrix = result.x, result.hess_inv.todense()

    else:
        if var is None:
            var = hist # assume Poisson stats if variances are not provided

        # skip "okay" bins with content 0 +/- 0 to avoid div-by-0 error in curve_fit
        # if bin content is non-zero but var = 0 let the user see the warning
        zeros = (hist == 0)
        zero_errors = (var == 0)
        mask = ~(zeros & zero_errors)
        sigma = np.sqrt(var)[mask]
        hist = hist[mask]
        xvals = ph.get_bin_centers(bins)[mask]
        if bounds is None:
            bounds = (-np.inf, np.inf)

        coeff, cov_matrix = curve_fit(func, xvals, hist,
                                      p0=guess, sigma=sigma, bounds=bounds)

    return coeff, cov_matrix
Exemplo n.º 5
0
def taylor_mode_max(hist, bins, var=None, mode_guess=None, n_bins=5, poissonLL=False):
    """ Get the max and mode of a peak based on Taylor exp near the max
    Returns the amplitude and position of a peak based on a poly fit over n_bins
    in the vicinity of the maximum of the hist (or the max near mode_guess, if provided)
    Parameters
    ----------
    hist : array-like
        The values of the histogram to be fit. Often: send in a slice around a peak
    bins : array-like
        The bin edges of the histogram to be fit
    var : array-like (optional)
        The variances of the histogram values. If not provided, square-root
        variances are assumed.
    mode_guess : float (optional)
        An x-value (not a bin index!) near which a peak is expected. The
        algorithm fits around the maximum within +/- n_bins of the guess. If not
        provided, the center of the max bin of the histogram is used.
    n_bins : int
        The number of bins (including the max bin) to be used in the fit. Also
        used for searching for a max near mode_guess
    Returns
    -------
    (maximum, mode) : tuple (float, float)
        maximum : the estimated maximum value of the peak
        mode : the estimated x-position of the maximum
    (pars, cov) : tuple (array, matrix)
        pars : 2-tuple with the parameters (mode, max) of the fit
            mode : the estimated x-position of the maximum
            maximum : the estimated maximum value of the peak
        cov : 2x2 matrix of floats
            The covariance matrix for the 2 parameters in pars

    Examples
    --------
    >>> import pygama.analysis.histograms as pgh
    >>> from numpy.random import normal
    >>> import pygama.analysis.peak_fitting as pgf
    >>> hist, bins, var = pgh.get_hist(normal(size=10000), bins=100, range=(-5,5))
    >>> pgf.taylor_mode_max(hist, bins, var, n_bins=5)
    """

    if mode_guess is not None: i_0 = ph.find_bin(mode_guess, bins)
    else: i_0 = np.argmax(hist)
    i_0 -= int(np.floor(n_bins/2))
    i_n = i_0 + n_bins
    wts = None if var is None else 1/np.sqrt(var[i_0:i_n])

    pars, cov = np.polyfit(ph.get_bin_centers(bins)[i_0:i_n], hist[i_0:i_n], 2, w=wts, cov='unscaled')
    mode = -pars[1] / 2 / pars[0]
    maximum = pars[2] - pars[0] * mode**2
    # build the jacobian to compute the output covariance matrix
    jac = np.array( [ [pars[1]/2/pars[0]**2,    -1/2/pars[0],       0],
                      [pars[1]**2/4/pars[0]**2, -pars[1]/2/pars[0], 1] ] )
    cov_jact = np.matmul(cov, jac.transpose())
    cov = np.matmul(jac, cov_jact)
    return (mode, maximum), cov
Exemplo n.º 6
0
def get_bin_estimates(pars, func, hist, bins, integral=None, **kwargs):
    """
    Bin expected means are estimated by f(bin_center)*bin_width. Supply an
    integrating function to compute the integral over the bin instead.
    TODO: make default integrating function a numerical method that is off by
    default.
    """
    if integral is None:
        return func(ph.get_bin_centers(bins), *pars, **kwargs) * ph.get_bin_widths(bins)
    else:
        return integral(bins[1:], *pars, **kwargs) - integral(bins[:-1], *pars, **kwargs)
Exemplo n.º 7
0
def goodness_of_fit(hist, bins, var, func, pars, method='var'):
    """ Compute chisq and dof of fit

    Parameters
    ----------
    hist, bins, var : array, array, array or None
        histogram data. var can be None if hist is integer counts
    func : function
        the function that was fit to the hist
    pars : array
        the best-fit pars of func. Assumes all pars are free parameters
    method : str
        Sets the choice of "denominator" in the chi2 sum
        'var': user passes in the variances in var (must not have zeros)
        'Pearson': use func (hist must contain integer counts)
        'Neyman': use hist (hist must contain integer counts and no zeros)

    Returns
    -------
    chisq : float
        the summed up value of chisquared
    dof : int
        the number of degrees of freedom
    """
    # arg checks
    if method == 'var':
        if var is None:
            print("goodness_of_fit: var must be non-None to use method 'var'")
            return 0, 0
        if np.any(var == 0):
            print("goodness_of_fit: var cannot contain zeros")
            return 0, 0
    if method == 'Neyman' and np.any(hist == 0):
        print("goodness_of_fit: hist cannot contain zeros for Neyman method")
        return 0, 0

    # compute chi2 numerator and denominator
    yy = func(ph.get_bin_centers(bins), *pars)
    numerator = (hist - yy)**2
    if method == 'var':
        denominator = var
    elif method == 'Pearson':
        denominator = yy
    elif method == 'Neyman':
        denominator = hist
    else:
        print(f"goodness_of_fit: unknown method {method}")
        return 0, 0

    # compute chi2 and dof
    chisq = np.sum(numerator / denominator)
    dof = len(hist) - len(pars)
    return chisq, dof
Exemplo n.º 8
0
def get_most_prominent_peaks(energySeries,
                             xlo,
                             xhi,
                             xpb,
                             max_num_peaks=np.inf,
                             test=False):
    """
    find the most prominent peaks in a spectrum by looking for spikes in derivative of spectrum
    energySeries: array of measured energies
    max_num_peaks = maximum number of most prominent peaks to find
    return a histogram around the most prominent peak in a spectrum of a given percentage of width
    """
    nb = int((xhi - xlo) / xpb)
    hist, bin_edges = np.histogram(energySeries, range=(xlo, xhi), bins=nb)
    bin_centers = get_bin_centers(bin_edges)

    # median filter along the spectrum, do this as a "baseline subtraction"
    hist_med = medfilt(hist, 21)
    hist = hist - hist_med

    # identify peaks with a scipy function (could be improved ...)
    peak_idxs = find_peaks_cwt(hist, np.arange(1, 6, 0.1), min_snr=5)
    peak_energies = bin_centers[peak_idxs]

    # pick the num_peaks most prominent peaks
    if max_num_peaks < len(peak_energies):
        peak_vals = hist[peak_idxs]
        sort_idxs = np.argsort(peak_vals)
        peak_idxs_max = peak_idxs[sort_idxs[-max_num_peaks:]]
        peak_energies = np.sort(bin_centers[peak_idxs_max])

    if test:
        plt.plot(bin_centers, hist, ls='steps', lw=1, c='b')
        for e in peak_energies:
            plt.axvline(e, color="r", lw=1, alpha=0.6)
        plt.xlabel("Energy [uncal]", ha='right', x=1)
        plt.ylabel("Filtered Spectrum", ha='right', y=1)
        plt.tight_layout()
        plt.show()
        exit()

    return peak_energies
Exemplo n.º 9
0
def gauss_mode_width_max(hist,
                         bins,
                         var=None,
                         mode_guess=None,
                         n_bins=5,
                         poissonLL=False,
                         inflate_errors=False,
                         gof_method='var'):
    """
    Get the max, mode, and width of a peak based on gauss fit near the max

    Returns the parameters of a gaussian fit over n_bins in the vicinity of the
    maximum of the hist (or the max near mode_guess, if provided). This is
    equivalent to a Taylor expansion around the peak maximum because near its
    maximum a Gaussian can be approximated by a 2nd-order polynomial in x:

    A exp[ -(x-mu)^2 / 2 sigma^2 ] ~= A [ 1 - (x-mu)^2 / 2 sigma^2 ]
                                    = A - (1/2!) (A/sigma^2) (x-mu)^2

    The advantage of using a gaussian over a polynomial directly is that the
    gaussian parameters are the ones we care about most for a peak, whereas for
    a poly we would have to extract them after the fit, accounting for
    covariances. The guassian also better approximates most peaks farther down
    the peak. However, the gauss fit is nonlinear and thus less stable.

    Parameters
    ----------
    hist : array-like
        The values of the histogram to be fit
    bins : array-like
        The bin edges of the histogram to be fit
    var : array-like (optional)
        The variances of the histogram values. If not provided, square-root
        variances are assumed.
    mode_guess : float (optional)
        An x-value (not a bin index!) near which a peak is expected. The
        algorithm fits around the maximum within +/- n_bins of the guess. If not
        provided, the center of the max bin of the histogram is used.
    n_bins : int (optional)
        The number of bins (including the max bin) to be used in the fit. Also
        used for searching for a max near mode_guess
    poissonLL : bool (optional)
        Flag passed to fit_hist()
    inflate_errors : bool (optional)
        If true, the parameter uncertainties are inflated by sqrt(chi2red)
        if it is greater than 1
    gof_method : str (optional)
        method flag for goodness_of_fit

    Returns
    -------
    (pars, cov) : tuple (array, matrix)
        pars : 3-tuple containing the parameters (mode, sigma, maximum) of the
               gaussian fit
            mode : the estimated x-position of the maximum
            sigma : the estimated width of the peak. Equivalent to a guassian
                width (sigma), but based only on the curvature within n_bins of
                the peak.  Note that the Taylor-approxiamted curvature of the
                underlying function in the vicinity of the max is given by max /
                sigma^2
            maximum : the estimated maximum value of the peak
        cov : 3x3 matrix of floats
            The covariance matrix for the 3 parameters in pars
    """

    bin_centers = ph.get_bin_centers(bins)
    if mode_guess is not None: i_0 = ph.find_bin(mode_guess, bins)
    else:
        i_0 = np.argmax(hist)
        mode_guess = bin_centers[i_0]
    amp_guess = hist[i_0]
    i_0 -= int(np.floor(n_bins / 2))
    i_n = i_0 + n_bins
    width_guess = (bin_centers[i_n] - bin_centers[i_0])
    vv = None if var is None else var[i_0:i_n]
    guess = (mode_guess, width_guess, amp_guess)
    try:
        pars, cov = fit_hist(gauss_basic,
                             hist[i_0:i_n],
                             bins[i_0:i_n + 1],
                             vv,
                             guess=guess,
                             poissonLL=poissonLL)
    except:
        return None, None
    if pars[1] < 0: pars[1] = -pars[1]
    if inflate_errors:
        chi2, dof = goodness_of_fit(hist, bins, var, gauss_basic, pars)
        if chi2 > dof: cov *= chi2 / dof
    return pars, cov
Exemplo n.º 10
0
def n_minus_1(run, campaign, df, dg, runtype, rt_min, radius, angle_det, rotary, cut_keys):

    with open('./cuts.json') as f:
        cuts = json.load(f)
    
    e_res_const = [0., 0., 0.]
    e_res_const[0] = cuts[str(run)]['e_res_const0']
    e_res_const[1] = cuts[str(run)]['e_res_const1']
    e_res_const[2] = cuts[str(run)]['e_res_const2']
        
    bl_cut_lo_raw = cuts[str(run)]['bl_cut_lo_raw']
    bl_cut_hi_raw = cuts[str(run)]['bl_cut_hi_raw'] 
    bl_slope_lo_raw = cuts[str(run)]['bl_slope_lo_raw']
    bl_slope_hi_raw = cuts[str(run)]['bl_slope_hi_raw']
    bl_sig_lo_raw = cuts[str(run)]['bl_sig_lo_raw']
    bl_sig_hi_raw = cuts[str(run)]['bl_sig_hi_raw']
    ftp_max_lo_raw = cuts[str(run)]['ftp_max_lo_raw']
    ftp_max_hi_raw = cuts[str(run)]['ftp_max_hi_raw'] 
    wf_max_fit_const = cuts[str(run)]['wf_max_fit_const']
    wf_max_fit_offset = cuts[str(run)]['wf_max_fit_offset']

    df = df.query(cuts[str(run)]['muon_cut']).copy()
    df_cut = df

    total_counts = len(df)
    print(f'total counts: {total_counts}')

    for cut_out in cut_keys:
        df_cut = df
        cut_set = cut_keys - set([cut_out])
        cut_full = " and ".join([cuts[str(run)][c] for c in cut_keys])
        print(f'Leaving out {cut_out}. \nfull cut: {cut_full}\n')
        
        #have to apply cuts individually instead of using `cut_full` because the total cut string is too long for the query :'(
        for cut in cut_set:
            print(f'applying cut: {cut}')
            df_cut = df_cut.query((cuts[str(run)][cut])).copy()
            cut_counts = len(df.query((cuts[str(run)][cut])).copy())
            percent_surviving = (cut_counts/total_counts)*100.
            print(f'Percentage surviving {cut} cut: {percent_surviving:.2f}')
        
        cut_counts_total = len(df_cut)
        percent_surviving_total = (cut_counts_total/total_counts)*100.
        print(f'Percentage surviving cuts: {percent_surviving_total:.2f}')
#         exit()

        # ____________baseline mean________________________________________

        fig, ax = plt.subplots()
#         suptitle = f'Run {run}; All cuts except: {cut_out}'
        suptitle = f'Run {run}; All cuts except: {cut_out}\n{percent_surviving_total:.2f}% surviving cuts'
        fig.suptitle(suptitle, horizontalalignment='center', fontsize=14)
        
        blo, bhi, bpb = 9000,9400, 1
        nbx = int((bhi-blo)/bpb)


        bl_hist, bins = np.histogram(df_cut['bl'], bins=nbx,
                range=[blo, bhi])
        bl_hist_raw, bins = np.histogram(df['bl'], bins=nbx,
                range=[blo, bhi])

        
        plt.semilogy(bins[1:], bl_hist_raw, c='k', alpha=0.3, ds='steps', lw=1., label='before cuts')
        plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts')
        
        plt.axvline(bl_cut_lo_raw, c='r', lw=1, label='95% cut lines')
        plt.axvline(bl_cut_hi_raw, c='r', lw=1)

        plt.xlabel('bl', fontsize=14)
        plt.ylabel('counts', fontsize=14)

#         plt.title(f'Baseline Mean \n{percent_surviving_total:.2f}% surviving cuts', fontsize = 14)
        plt.title(f'Baseline Mean', fontsize = 14)

        plt.setp(ax.get_xticklabels(), fontsize=12)
        plt.setp(ax.get_yticklabels(), fontsize=12)

        ax.text(0.05, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom',
                horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10}) #0.1, 0.75, 
        
        plt.legend(loc='center left')
        
        plt.tight_layout()

        plt.savefig(f'./plots/{campaign}N_minus_1/raw/{str(run)}/except_{cut_out}_bl_mean_raw.png', dpi=200)
        plt.clf()
        plt.close()
        
#         exit()


        # ____________baseline slope________________________________________

        fig, ax = plt.subplots()
        fig.suptitle(suptitle, horizontalalignment='center', fontsize=14)
        
        blo, bhi, bpb = -10., 10., 0.005
        nbx = int((bhi-blo)/bpb)

        bl_hist, bins = np.histogram(df_cut['bl_slope'], bins=nbx,range=[blo, bhi])
        bl_hist_raw, bins = np.histogram(df['bl_slope'], bins=nbx,range=[blo, bhi])

        plt.semilogy(bins[1:], bl_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts')
        plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts')
        
        
        plt.axvline(bl_slope_lo_raw, c='r', lw=1, label = '95% cut lines')
        plt.axvline(bl_slope_hi_raw, c='r', lw=1)


        plt.xlabel('bl_slope', fontsize=14)
        plt.ylabel('counts', fontsize=14)

        plt.title(f'Baseline Slope', fontsize = 14)

        plt.setp(ax.get_xticklabels(), fontsize=12)
        plt.setp(ax.get_yticklabels(), fontsize=12)
        

        ax.text(0.05, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom',
                horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10})
        
        plt.legend()
        
        plt.tight_layout()

        plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_bl_slope_raw.png', dpi=200)
        plt.clf()
        plt.close()

        # ____________baseline sigma________________________________________

        fig, ax = plt.subplots()
        fig.suptitle(suptitle, horizontalalignment='center', fontsize=14)

        blo, bhi, bpb = 2., 12., 0.005
        nbx = int((bhi-blo)/bpb)

        bl_hist, bins = np.histogram(df_cut['bl_sig'], bins=nbx, range=[blo, bhi])
        bl_hist_raw, bins = np.histogram(df['bl_sig'], bins=nbx, range=[blo, bhi])

        plt.semilogy(bins[1:], bl_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts')
        plt.semilogy(bins[1:], bl_hist, ds='steps', c='b', lw=1, label = 'after cuts')
        
        
        plt.axvline(bl_sig_lo_raw, c='r', lw=1, label = '95% cut lines')
        plt.axvline(bl_sig_hi_raw, c='r', lw=1)

        plt.xlabel('bl_sigma', fontsize=14)
        plt.ylabel('counts', fontsize=14)

        plt.title(f'Baseline Sigma', fontsize = 14)

        plt.setp(ax.get_xticklabels(), fontsize=12)
        plt.setp(ax.get_yticklabels(), fontsize=12)

        ax.text(0.9, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom',
            horizontalalignment='right', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10})
        
        plt.legend(loc='center right')
        
        plt.tight_layout()


        plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_bl_sig_raw.png', dpi=200)
        plt.clf()
        plt.close()

        # ____________trapEftp/trapEmax________________________________________

        fig, ax = plt.subplots()
        fig.suptitle(suptitle, horizontalalignment='center', fontsize=14)

        elo, ehi = 0.925, 1.01
        e_bins = int((ehi - elo )/0.001)

        ftp_max_hist, bins = np.histogram(df_cut['ftp_max'], bins=nbx, range=[elo, ehi])
        ftp_max_hist_raw, bins = np.histogram(df['ftp_max'], bins=nbx, range=[elo, ehi])

        plt.semilogy(bins[1:], ftp_max_hist_raw, ds='steps', c='k', alpha=0.3, lw=1, label='before cuts')
        plt.semilogy(bins[1:], ftp_max_hist, ds='steps', c='b', lw=1, label = 'after cuts')
        
        plt.axvline(ftp_max_lo_raw, c='r', lw=1, label='95% cut lines')
        plt.axvline(ftp_max_hi_raw, c='r', lw=1)


        plt.xlabel('trapEftp/trapEmax', fontsize=14)
        plt.ylabel('counts', fontsize=14)

        plt.title(f'trapEftp/trapEmax', fontsize = 14)

        plt.setp(ax.get_xticklabels(), fontsize=12)
        plt.setp(ax.get_yticklabels(), fontsize=12)

        ax.text(0.1, 0.75, f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom',
            horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10})
        
        plt.legend(loc='center left')
        
        plt.tight_layout()

        plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_ftp_max_raw.png', dpi=200)
        plt.clf()
        plt.close()

        # ____________wf_maxVtrapEftp_cal________________________________________

        fig, ax = plt.subplots()
        fig.suptitle(suptitle, horizontalalignment='center', fontsize=14)
        
        elo, ehi, epb = 0, 5500, 1
        e_bins = 2000 #int((ehi-elo)/epb)
        wflo, wfhi = 0, 15000
        wf_bins = 2000
        wf_maxVEnergy, xedges, yedges = np.histogram2d(df_cut['wf_max'], df_cut['trapEftp_cal'], bins=[wf_bins, e_bins], range=([wflo, wfhi], [elo, ehi]))
        X, Y = np.mgrid[wflo:wfhi:wf_bins*1j, elo:ehi:e_bins*1j]


        pcm = plt.pcolormesh(X, Y, wf_maxVEnergy,norm=LogNorm())
        cb = plt.colorbar()
        cb.set_label("counts", ha = 'right', va='center', rotation=270, fontsize=14)
        cb.ax.tick_params(labelsize=12)

        ax.text(0.1, 0.75,  f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom',
            horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=12, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 10})
        
        # note: plotting the fit lines is only reliable if you used the same binning as when the fit was done!
        en_bin_centers = pgh.get_bin_centers(xedges)
        cal_en_bin_centers = pgh.get_bin_centers(yedges)
        
        z = (wf_max_fit_const*en_bin_centers + wf_max_fit_offset + 2.*np.sqrt(e_res_const[0]+e_res_const[1]*cal_en_bin_centers +
            (e_res_const[2]*cal_en_bin_centers**2))) 
        plt.plot(en_bin_centers, z, 'r', lw = 0.7, label= 'cut lines')
        
        w = (wf_max_fit_const*en_bin_centers + wf_max_fit_offset - 2.*np.sqrt(e_res_const[0]+e_res_const[1]*cal_en_bin_centers +
            e_res_const[2]*cal_en_bin_centers**2))
        plt.plot(en_bin_centers, w, 'r', lw=0.7)

        ax.set_xlabel('wf_max', fontsize=14)
        ax.set_ylabel('trapEftp_cal (keV)', fontsize=14)
        plt.setp(ax.get_xticklabels(), fontsize=12)
        plt.setp(ax.get_yticklabels(), fontsize=12)
        plt.title(f'wf_max vs Energy', horizontalalignment='center', fontsize=14)
        
        plt.legend(loc='lower right')
        
        plt.tight_layout()


        plt.ylim(0, 300)
        plt.xlim(0, 800)

        plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_lowE.png', dpi=200)

        plt.ylim(1200, 1550)
        plt.xlim(3300, 4300)

        plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_1460.png', dpi=200)


        plt.ylim(2400, 2750)
        plt.xlim(6600, 8000)

        plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_wf_max_raw_2615.png', dpi=200)
        plt.clf()
        plt.close()



        # ____________60 keV with fit________________________________________

        pgfenergy_hist, pgfebins, evars = pgh.get_hist(df_cut['trapEftp_cal'], bins=50, range=[54, 65])
        raw_pgfenergy_hist, pgfebins, evars = pgh.get_hist(df['trapEftp_cal'], bins=50, range=[54, 65])#range=[54, 65]
        pars, cov = pgf.gauss_mode_width_max(pgfenergy_hist, pgfebins, evars)
        mode = pars[0]
        width = pars[1]
        amp = pars[2]
        print(f'mode: {mode}')
        print(f'width: {width}')
        print(f'amp: {amp}')


        e_pars, ecov = pgf.fit_hist(cage_utils.gauss_fit_func, pgfenergy_hist, pgfebins, evars, guess = (amp, mode, width, 1))

        mean_fit = e_pars[1]
        width_fit = e_pars[2]
        amp_fit = e_pars[0]
        const_fit = e_pars[3]

        fwhm = width_fit*2.355

        print(f'mean: {mean_fit}')
        print(f'width: {width_fit}')
        print(f'amp: {amp_fit}')
        print(f'C: {const_fit}')
        print(f'FWHM at 60 keV: {fwhm} \n{(fwhm/mean_fit)*100}%')

        fig, ax = plt.subplots()
        fig.suptitle(suptitle, horizontalalignment='center', fontsize=14)

        plt.plot(pgfebins[1:], cage_utils.gauss_fit_func(pgfebins[1:], *e_pars), c = 'r', lw=0.8, label='gaussian fit')
        plt.plot(pgfebins[1:], pgfenergy_hist, ds='steps', c='b', lw=1., label='after cuts')
        plt.plot(pgfebins[1:], raw_pgfenergy_hist, ds='steps', c='k', alpha=0.3, lw=1., label='before cuts')

        plt.xlabel('Energy (keV)', fontsize=14)
        plt.ylabel('counts', fontsize=14)

        plt.title(f'60 keV peak with gaussian fit', fontsize = 14)

        plt.setp(ax.get_xticklabels(), fontsize=12)
        plt.setp(ax.get_yticklabels(), fontsize=12)

        ax.text(0.05, 0.75,  f'r = {radius} mm \ntheta = {angle_det} deg \nruntime {rt_min:.2f}', verticalalignment='bottom',
            horizontalalignment='left', transform=ax.transAxes, color='black', fontsize=10, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 8})
        ax.text(0.95, 0.72,  f'mean: {mean_fit:.2f} \nsigma: {width_fit:.3f} \nFWHM at 60 keV: {fwhm:.2f} keV\n({(fwhm/mean_fit)*100:.2f}%)', verticalalignment='bottom',
            horizontalalignment='right', transform=ax.transAxes, color='black', fontsize=10, bbox={'facecolor': 'white', 'alpha': 0.5, 'pad': 8})
        
        plt.legend(loc='center right')
        
        plt.tight_layout()

        plt.savefig(f'./plots/{campaign}dataCleaning/N_minus_1/raw/{str(run)}/except_{cut_out}_fit_60keV_raw.png', dpi=200)
        plt.clf()
        plt.close()
Exemplo n.º 11
0
def get_hpge_E_peak_par_guess(hist, bins, var, func):
    """ Get parameter guesses for func fit to peak in hist

    Parameters
    ----------
    hist, bins, var: array, array, array
        Histogram of uncalibrated energies, see pgh.get_hist(). Should be
        windowed around the peak.
    func : function
        The function to be fit to the peak in the (windowed) hist
    """
    if func == pgp.gauss_step:
        # pars are: amp, mu, sigma, bkg, step
        # get mu and hieght from a gaus fit
        pars, cov = pgf.gauss_mode_max(hist, bins, var)
        if pars is None:
            print("get_hpge_E_peak_par_guess: gauss_mode_max failed")
            return []
        mu = pars[0]
        height = pars[1]

        # get bg and step from edges of hist
        bg = np.sum(hist[-5:])/5
        step = np.sum(hist[:5])/5 - bg

        # get sigma from fwfm with f = 1/sqrt(e)
        try:
            sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg+step/2, method='interpolate')[0]
            if sigma == 0: raise ValueError
        except:
            sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg+step/2, method='fit_slopes')[0]
            if sigma == 0:
                print("get_hpge_E_peak_par_guess: sigma estimation failed")
                return []

        # now compute amp and return
        height -= (bg + step/2)
        amp = height * sigma * np.sqrt(2 * np.pi)
        return [amp, mu, sigma, bg, step]

    if func == pgp.radford_peak:
        # pars are: mu, sigma, hstep, htail, tau, bg0, amp

        #guess mu, height
        i_0 = np.argmax(hist)
        mu = pgh.get_bin_centers(bins)[i_0]
        height = hist[i_0]

        # get bg and step from edges of hist
        bg0 = np.sum(hist[-5:])/5
        step = np.sum(hist[:5])/5 - bg0

        # get sigma from fwfm with f = 1/sqrt(e)
        try:
            sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg0+step/2, method='interpolate')[0]
            if sigma == 0: raise ValueError
        except:
            sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg0+step/2, method='fit_slopes')[0]
            if sigma == 0: print("get_hpge_E_peak_par_guess: sigma estimation failed")
            return []
        sigma = sigma*.5 # roughly remove some amount due to tail

        # for now hard-coded
        htail = 1./5
        tau = 6.*sigma

        # now compute amp and return
        height -= (bg0 + step/2)
        amp = height / (htail*0.87/35 + (1-htail)/(sigma*np.sqrt(2*np.pi))) #numerical factors from definition of tail_func @ mu

        hstep = step/(2*amp)

        parguess = [mu, sigma, hstep, htail, tau, bg0, amp]

        return parguess
    if func == pgp.radford_peak_wrapped:
        # pars are: mu, sigma, hstep, htail, tau, bg0, amp
        # get mu and height from a gaus fit
        #pars, cov = pgf.gauss_mode_max(hist, bins, var)

        #guess mu, height
        i_0 = np.argmax(hist)
        mu = pgh.get_bin_centers(bins)[i_0]
        height = hist[i_0]

        # get bg and step from edges of hist
        bg0 = np.sum(hist[-5:])/5
        step = np.sum(hist[:5])/5 - bg0

        # get sigma from fwfm with f = 1/sqrt(e)
        try:
            sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg0+step/2, method='interpolate')[0]
            if sigma == 0: raise ValueError
        except:
            sigma = pgh.get_fwfm(0.6065, hist, bins, var, mx=height, bl=bg0+step/2, method='fit_slopes')[0]
            if sigma == 0: print("get_hpge_E_peak_par_guess: sigma estimation failed")
            return []
        sigma = sigma*.5 # roughly remove some amount due to tail

        # for now hard-coded
        htail = 1./5
        tau = 6.*sigma

        # now compute amp and return
        height -= (bg0 + step/2)
        amp = height / (htail*0.87/35 + (1-htail)/(sigma*np.sqrt(2*np.pi))) #numerical factors from definition of tail_func @ mu

        hstep = step/(2*amp)

        # convert to wrapped parameters
        A = amp*(1-htail)
        S = amp*2*hstep
        T = amp*htail

        parguess = [A, mu, sigma, bg0, S, T, tau]

        return parguess
    else:
        print(f'get_hpge_E_peak_par_guess not implemented for {func.__name__}')
        return []
Exemplo n.º 12
0
def calibrate_tl208(energy_series, cal_peaks=None, plotFigure=None):
    """
    energy_series: array of energies we want to calibrate
    cal_peaks: array of peaks to fit

    1.) we find the 2614 peak by looking for the tallest peak at >0.1 the max adc value
    2.) fit that peak to get a rough guess at a calibration to find other peaks with
    3.) fit each peak in peak_energies
    4.) do a linear fit to the peak centroids to find a calibration
    """

    if cal_peaks is None:
        cal_peaks = np.array(
            [238.632, 510.770, 583.191, 727.330, 860.564,
             2614.553])  #get_calibration_energies(peak_energies)
    else:
        cal_peaks = np.array(cal_peaks)

    if len(energy_series) < 100:
        return 1, 0

    #get 10 most prominent ~high e peaks
    max_adc = np.amax(energy_series)
    energy_hi = energy_series  #[ (energy_series > np.percentile(energy_series, 20)) & (energy_series < np.percentile(energy_series, 99.9))]

    peak_energies, peak_e_err = get_most_prominent_peaks(energy_hi,)
    rough_kev_per_adc, rough_kev_offset = match_peaks(peak_energies, cal_peaks)
    e_cal_rough = rough_kev_per_adc * energy_series + rough_kev_offset

    # return rough_kev_per_adc, rough_kev_offset
    # print(energy_series)
    # plt.ion()
    # plt.figure()
    # # for peak in cal_peaks:
    # #     plt.axvline(peak, c="r", ls=":")
    # # energy_series.hist()
    # # for peak in peak_energies:
    # #      plt.axvline(peak, c="r", ls=":")
    # #
    # plt.hist(energy_series)
    # # plt.hist(e_cal_rough[e_cal_rough>100], bins=2700)
    # val = input("do i exist?")
    # exit()

    ###############################################
    #Do a real fit to every peak in peak_energies
    ###############################################
    max_adc = np.amax(energy_series)

    peak_num = len(cal_peaks)
    centers = np.zeros(peak_num)
    fit_result_map = {}
    bin_size = 0.2  #keV

    if plotFigure is not None:
        plot_map = {}

    for i, energy in enumerate(cal_peaks):
        window_width = 10  #keV
        window_width_in_adc = (window_width) / rough_kev_per_adc
        energy_in_adc = (energy - rough_kev_offset) / rough_kev_per_adc
        bin_size_adc = (bin_size) / rough_kev_per_adc

        peak_vals = energy_series[
            (energy_series > energy_in_adc - window_width_in_adc) &
            (energy_series < energy_in_adc + window_width_in_adc)]

        peak_hist, bins = np.histogram(
            peak_vals,
            bins=np.arange(energy_in_adc - window_width_in_adc,
                           energy_in_adc + window_width_in_adc + bin_size_adc,
                           bin_size_adc))
        bin_centers = pgh.get_bin_centers(bins)
        # plt.ion()
        # plt.figure()
        # plt.plot(bin_centers,peak_hist,  color="k", ls="steps")

        # inpu = input("q to quit...")
        # if inpu == "q": exit()

        try:
            guess_e, guess_sigma, guess_area = get_gaussian_guess(
                peak_hist, bin_centers)
        except IndexError:
            print("\n\nIt looks like there may not be a peak at {} keV".format(
                energy))
            print("Here is a plot of the area I'm searching for a peak...")
            plt.ion()
            plt.figure(figsize=(12, 6))
            plt.subplot(121)
            plt.plot(bin_centers, peak_hist, color="k", ls="steps")
            plt.subplot(122)
            plt.hist(e_cal_rough, bins=2700, histtype="step")
            input("-->press any key to continue...")
            sys.exit()

        plt.plot(
            bin_centers,
            gauss(bin_centers, guess_e, guess_sigma, guess_area),
            color="b")

        # inpu = input("q to quit...")
        # if inpu == "q": exit()

        bounds = ([0.9 * guess_e, 0.5 * guess_sigma, 0, 0, 0, 0, 0], [
            1.1 * guess_e, 2 * guess_sigma, 0.1, 0.75, window_width_in_adc, 10,
            5 * guess_area
        ])
        params = fit_binned(
            radford_peak,
            peak_hist,
            bin_centers,
            [guess_e, guess_sigma, 1E-3, 0.7, 5, 0, guess_area],
        )  #bounds=bounds)

        plt.plot(bin_centers, radford_peak(bin_centers, *params), color="r")

        # inpu = input("q to quit...")
        # if inpu == "q": exit()

        fit_result_map[energy] = params
        centers[i] = params[0]

        if plotFigure is not None:
            plot_map[energy] = (bin_centers, peak_hist)

    #Do a linear fit to find the calibration
    linear_cal = np.polyfit(centers, cal_peaks, deg=1)

    if plotFigure is not None:

        plt.figure(plotFigure.number)
        plt.clf()

        grid = gs.GridSpec(peak_num, 3)
        ax_line = plt.subplot(grid[:, 1])
        ax_spec = plt.subplot(grid[:, 2])

        for i, energy in enumerate(cal_peaks):
            ax_peak = plt.subplot(grid[i, 0])
            bin_centers, peak_hist = plot_map[energy]
            params = fit_result_map[energy]
            ax_peak.plot(
                bin_centers * rough_kev_per_adc + rough_kev_offset,
                peak_hist,
                ls="steps-mid",
                color="k")
            fit = radford_peak(bin_centers, *params)
            ax_peak.plot(
                bin_centers * rough_kev_per_adc + rough_kev_offset,
                fit,
                color="b")

        ax_peak.set_xlabel("Energy [keV]")

        ax_line.scatter(
            centers,
            cal_peaks,
        )

        x = np.arange(0, max_adc, 1)
        ax_line.plot(x, linear_cal[0] * x + linear_cal[1])
        ax_line.set_xlabel("ADC")
        ax_line.set_ylabel("Energy [keV]")

        energies_cal = energy_series * linear_cal[0] + linear_cal[1]
        peak_hist, bins = np.histogram(energies_cal, bins=np.arange(0, 2700))
        ax_spec.semilogy(pgh.get_bin_centers(bins), peak_hist, ls="steps-mid")
        ax_spec.set_xlabel("Energy [keV]")

    return linear_cal