def __init__(self, x, y, magnitude, max_magnitude): self.fig, self.axes = SnglBurstUtils.make_burst_plot("X", "Y") self.fig.set_size_inches(6, 6) self.x = x self.y = y self.magnitude = magnitude self.n_foreground = 0 self.n_background = 0 self.n_injections = 0 max_magnitude = math.log10(max_magnitude) self.foreground_bins = rate.BinnedArray( rate.NDBins((rate.LinearBins(-max_magnitude, max_magnitude, 1024), rate.LinearBins(-max_magnitude, max_magnitude, 1024)))) self.background_bins = rate.BinnedArray( rate.NDBins((rate.LinearBins(-max_magnitude, max_magnitude, 1024), rate.LinearBins(-max_magnitude, max_magnitude, 1024)))) self.coinc_injection_bins = rate.BinnedArray( rate.NDBins((rate.LinearBins(-max_magnitude, max_magnitude, 1024), rate.LinearBins(-max_magnitude, max_magnitude, 1024)))) self.incomplete_coinc_injection_bins = rate.BinnedArray( rate.NDBins((rate.LinearBins(-max_magnitude, max_magnitude, 1024), rate.LinearBins(-max_magnitude, max_magnitude, 1024))))
def __init__(self, x_instrument, y_instrument, magnitude, desc, min_magnitude, max_magnitude): self.fig, self.axes = SnglBurstUtils.make_burst_plot( "%s %s" % (x_instrument, desc), "%s %s" % (y_instrument, desc)) self.fig.set_size_inches(6, 6) self.axes.loglog() self.x_instrument = x_instrument self.y_instrument = y_instrument self.magnitude = magnitude self.foreground_x = [] self.foreground_y = [] self.n_foreground = 0 self.n_background = 0 self.n_injections = 0 self.foreground_bins = rate.BinnedArray( rate.NDBins( (rate.LogarithmicBins(min_magnitude, max_magnitude, 1024), rate.LogarithmicBins(min_magnitude, max_magnitude, 1024)))) self.background_bins = rate.BinnedArray( rate.NDBins( (rate.LogarithmicBins(min_magnitude, max_magnitude, 1024), rate.LogarithmicBins(min_magnitude, max_magnitude, 1024)))) self.coinc_injection_bins = rate.BinnedArray( rate.NDBins( (rate.LogarithmicBins(min_magnitude, max_magnitude, 1024), rate.LogarithmicBins(min_magnitude, max_magnitude, 1024)))) self.incomplete_coinc_injection_bins = rate.BinnedArray( rate.NDBins( (rate.LogarithmicBins(min_magnitude, max_magnitude, 1024), rate.LogarithmicBins(min_magnitude, max_magnitude, 1024))))
def create_efficiency_plot(axes, all_injections, found_injections, detection_threshold, cal_uncertainty): filter_width = 16.7 # formats axes.semilogx() axes.set_position([0.10, 0.150, 0.86, 0.77]) # set desired yticks axes.set_yticks((0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)) axes.set_yticklabels((r"\(0\)", r"\(0.1\)", r"\(0.2\)", r"\(0.3\)", r"\(0.4\)", r"\(0.5\)", r"\(0.6\)", r"\(0.7\)", r"\(0.8\)", r"\(0.9\)", r"\(1.0\)")) axes.xaxis.grid(True, which = "major,minor") axes.yaxis.grid(True, which = "major,minor") # put made and found injections in the denominators and # numerators of the efficiency bins bins = rate.NDBins((rate.LogarithmicBins(min(sim.amplitude for sim in all_injections), max(sim.amplitude for sim in all_injections), 400),)) efficiency_num = rate.BinnedArray(bins) efficiency_den = rate.BinnedArray(bins) for sim in found_injections: efficiency_num[sim.amplitude,] += 1 for sim in all_injections: efficiency_den[sim.amplitude,] += 1 # generate and plot trend curves. adjust window function # normalization so that denominator array correctly # represents the number of injections contributing to each # bin: make w(0) = 1.0. note that this factor has no # effect on the efficiency because it is common to the # numerator and denominator arrays. we do this for the # purpose of computing the Poisson error bars, which # requires us to know the counts for the bins windowfunc = rate.gaussian_window(filter_width) windowfunc /= windowfunc[len(windowfunc) / 2 + 1] rate.filter_array(efficiency_num.array, windowfunc) rate.filter_array(efficiency_den.array, windowfunc) # regularize: adjust unused bins so that the efficiency is # 0, not NaN assert (efficiency_num.array <= efficiency_den.array).all() efficiency_den.array[(efficiency_num.array == 0) & (efficiency_den.array == 0)] = 1 line1, A50, A50_err = render_data_from_bins(file("string_efficiency.dat", "w"), axes, efficiency_num, efficiency_den, cal_uncertainty, filter_width, colour = "k", linestyle = "-", erroralpha = 0.2) # add a legend to the axes axes.legend((line1,), (r"\noindent Injections recovered with $\log \Lambda > %.2f$" % detection_threshold,), loc = "lower right") # adjust limits axes.set_xlim([3e-22, 3e-19]) axes.set_ylim([0.0, 1.0])
def finish(self): livetime = rate.BinnedArray(self.counts.bins) for offsets in self.tisi_rows: self.seglists.offsets.update(offsets) livetime[offsets[self.x_instrument], offsets[self.y_instrument]] += float( abs(self.seglists.intersection(self.seglists.keys()))) zvals = self.counts.at_centres() / livetime.at_centres() rate.filter_array(zvals, rate.gaussian_window(3, 3)) xcoords, ycoords = self.counts.centres() self.axes.contour(xcoords, ycoords, numpy.transpose(numpy.log(zvals))) for offsets in self.tisi_rows: if any(offsets): # time slide vector is non-zero-lag self.axes.plot((offsets[self.x_instrument], ), (offsets[self.y_instrument], ), "k+") else: # time slide vector is zero-lag self.axes.plot((offsets[self.x_instrument], ), (offsets[self.y_instrument], ), "r+") self.axes.set_xlim( [self.counts.bins().min[0], self.counts.bins().max[0]]) self.axes.set_ylim( [self.counts.bins().min[1], self.counts.bins().max[1]]) self.axes.set_title( r"Coincident Event Rate vs. Instrument Time Offset (Logarithmic Rate Contours)" )
def _derivitave_fit(self, farts, FARt, vAs, twodbin): ''' Relies on scipy spline fits for each mass bin to find the derivitave of the volume at a given FAR. See how this works for a simple case where I am clearly giving it a parabola. To high precision it calculates the proper derivitave. A = [1, 4, 9, 16, 25, 36, 49, 64, 81, 100] B = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] C = interpolate.splrep(B,A,s=0, k=4) interpolate.splev(5,C,der=1) 10.000 ''' dA = rate.BinnedArray(twodbin) for m1 in range(dA.array.shape[0]): for m2 in range(dA.array.shape[1]): da = [] for f in farts.centres(): da.append(vAs[farts[f]].array[m1][m2]) fit = interpolate.splrep(farts.centres(), da, k=4) val = interpolate.splev(FARt, fit, der=1) #print val # FIXME this prevents negative derivitives arising from bad fits if val < 0: val = 0 dA.array[m1][m2] = val # minus the derivitave return dA
def get_volume_derivative(injfnames, twoDMassBins, dBin, FAR, zero_lag_segments, gw): if (FAR == 0): print("\n\nFAR = 0\n \n") # FIXME lambda = ~inf if loudest event is above loudest timeslide? output = rate.BinnedArray(twoDMassBins) output.array = 10**6 * numpy.ones(output.array.shape) return output livetime = float(abs(zero_lag_segments)) FARh = FAR * 100000 FARl = FAR * 0.001 nbins = 5 FARS = rate.LogarithmicBins(FARl, FARh, nbins) vA = [] vA2 = [] for far in FARS.centres(): m, f = get_injections(injfnames, far, zero_lag_segments) print("computing volume at FAR " + str(far), file=sys.stderr) vAt, vA2t = twoD_SearchVolume(f, m, twoDMassBins, dBin, gw, livetime, 1) # we need to compute derivitive of log according to ul paper vAt.array = scipy.log10(vAt.array + 0.001) vA.append(vAt) # the derivitive is calcuated with respect to FAR * t FARS = rate.LogarithmicBins(FARl * livetime, FARh * livetime, nbins) return derivitave_fit(FARS, FAR * livetime, vA, twoDMassBins)
def __init__(self, *args, **kwargs): super(LnSignalDensity, self).__init__(*args, **kwargs) # initialize SNRPDF self.SNRPDF = {} for n in range(self.min_instruments, len(self.instruments) + 1): for ifo_combos in itertools.combinations(sorted(self.instruments), n): self.SNRPDF[ifo_combos] = rate.BinnedArray( rate.NDBins([self.snr_binning] * len(ifo_combos)))
def live_time_array(self, instruments): """ return an array of live times, note every bin will be the same :) it is just a convenience. """ live_time = rate.BinnedArray(self.twoDMassBins) live_time.array += 1.0 live_time.array *= self.livetime[instruments] return live_time
def add_contents(self, contents): if self.tisi_rows is None: # get a list of time slide dictionaries self.tisi_rows = contents.time_slide_table.as_dict().values() # find the largest and smallest offsets min_offset = min(offset for vector in self.tisi_rows for offset in vector.values()) max_offset = max(offset for vector in self.tisi_rows for offset in vector.values()) # a guess at the time slide spacing: works if the # time slides are distributed as a square grid over # the plot area. (max - min)^2 gives the area of # the time slide square in square seconds; dividing # by the length of the time slide list gives the # average area per time slide; taking the square # root of that gives the average distance between # adjacent time slides in seconds time_slide_spacing = ((max_offset - min_offset)**2 / len(self.tisi_rows))**0.5 # use an average of 3 bins per time slide in each # direction, but round to an odd integer nbins = int( math.ceil((max_offset - min_offset) / time_slide_spacing * 3)) # construct the binning self.counts = rate.BinnedArray( rate.NDBins((rate.LinearBins(min_offset, max_offset, nbins), rate.LinearBins(min_offset, max_offset, nbins)))) self.seglists |= contents.seglists for offsets in contents.connection.cursor().execute( """ SELECT tx.offset, ty.offset FROM coinc_event JOIN time_slide AS tx ON ( tx.time_slide_id == coinc_event.time_slide_id ) JOIN time_slide AS ty ON ( ty.time_slide_id == coinc_event.time_slide_id ) WHERE coinc_event.coinc_def_id == ? AND tx.instrument == ? AND ty.instrument == ? """, (contents.bb_definer_id, self.x_instrument, self.y_instrument)): try: self.counts[offsets] += 1 except IndexError: # beyond plot boundaries pass
def compute_search_efficiency_in_bins(found, total, ndbins, sim_to_bins_function = lambda sim: (sim.distance,)): """ This program creates the search efficiency in the provided ndbins. The first dimension of ndbins must be the distance. You also must provide a function that maps a sim inspiral row to the correct tuple to index the ndbins. """ num = rate.BinnedArray(ndbins) den = rate.BinnedArray(ndbins) # increment the numerator with the found injections for sim in found: num[sim_to_bins_function(sim)] += 1 # increment the denominator with the total injections for sim in total: den[sim_to_bins_function(sim)] += 1 # sanity check assert (num.array <= den.array).all(), "some bins have more found injections than were made" # regularize by setting empty bins to zero efficiency den.array[numpy.logical_and(num.array == 0, den.array == 0)] = 1 # pull out the efficiency array, it is the ratio eff = rate.BinnedArray(rate.NDBins(ndbins), array = num.array / den.array) # compute binomial uncertainties in each bin k = num.array N = den.array eff_lo_arr = ( N*(2*k + 1) - numpy.sqrt(4*N*k*(N - k) + N**2) ) / (2*N*(N + 1)) eff_hi_arr = ( N*(2*k + 1) + numpy.sqrt(4*N*k*(N - k) + N**2) ) / (2*N*(N + 1)) eff_lo = rate.BinnedArray(rate.NDBins(ndbins), array = eff_lo_arr) eff_hi = rate.BinnedArray(rate.NDBins(ndbins), array = eff_hi_arr) return eff_lo, eff, eff_hi
def compute_search_volume(eff): """ Integrate efficiency to get search volume. """ # get distance bins ndbins = eff.bins dx = ndbins[0].upper() - ndbins[0].lower() r = ndbins[0].centres() # we have one less dimension on the output vol = rate.BinnedArray(rate.NDBins(ndbins[1:])) # integrate efficiency to obtain volume vol.array = numpy.trapz(eff.array.T * 4. * numpy.pi * r**2, r, dx) return vol
def median_from_lnpdf(ln_pdf): # # get bin probabilities from bin counts # assert (ln_pdf.array >= 0.).all(), "PDF contains negative counts" cdf = ln_pdf.array.cumsum() cdf /= cdf[-1] # # wrap (CDF - 0.5) in an InterpBinnedArray and use bisect to solve # for 0 crossing. the interpolator gets confused where the x # co-ordinates are infinite and it breaks the bisect function for # some reason, so to work around the issue we start the upper and # lower boundaries in a little bit from the edges of the domain. # FIXME: this is stupid, find a root finder that gives the correct # answer regardless. # func = rate.InterpBinnedArray(rate.BinnedArray(ln_pdf.bins, cdf - 0.5)) median = optimize.bisect(func, ln_pdf.bins[0].lower()[2], ln_pdf.bins[0].upper()[-3], xtol=1e-14, disp=False) # # check result (detects when the root finder has failed for some # reason) # assert abs(func(median)) < 1e-13, "failed to find median (got %g)" % median # # done # return median
def SNRPDF(instruments, horizon_distances, snr_cutoff, n_samples=200000, bins=rate.ATanLogarithmicBins(3.6, 1e3, 150)): """ Precomputed SNR PDF for each detector Returns a BinnedArray containing P(snr_{inst1}, snr_{inst2}, ... | signal seen in exactly {inst1, inst2, ...} in a network of instruments with a given set of horizon distances) i.e., the joint probability density of observing a set of SNRs conditional on them being the result of signal that has been recovered in a given subset of the instruments in a network of instruments with a given set of horizon distances. The axes of the PDF correspond to the instruments in alphabetical order. The binning used for all axes is set with the bins parameter. The n_samples parameter sets the number of iterations for the internal Monte Carlo sampling loop. """ if n_samples < 1: raise ValueError("n_samples=%d must be >= 1" % n_samples) # get instrument names instruments = sorted(instruments) if len(instruments) < 1: raise ValueError(instruments) # get the horizon distances in the same order DH_times_8 = 8. * numpy.array( [horizon_distances[inst] for inst in instruments]) # get detector responses in the same order resps = [ lalsimulation.DetectorPrefixToLALDetector(str(inst)).response for inst in instruments ] # get horizon distances and responses of remaining # instruments (order doesn't matter as long as they're in # the same order) DH_times_8_other = 8. * numpy.array([ dist for inst, dist in horizon_distances.items() if inst not in instruments ]) resps_other = tuple( lalsimulation.DetectorPrefixToLALDetector(str(inst)).response for inst in horizon_distances if inst not in instruments) # initialize the PDF array, and pre-construct the sequence of # snr, d(snr) tuples. since the last SNR bin probably has # infinite size, we remove it from the sequence # (meaning the PDF will be left 0 in that bin) pdf = rate.BinnedArray(rate.NDBins([bins] * len(instruments))) snr_sequence = rate.ATanLogarithmicBins(3.6, 1e3, 500) snr_snrlo_snrhi_sequence = numpy.array( zip(snr_sequence.centres(), snr_sequence.lower(), snr_sequence.upper())[:-1]) # compute the SNR at which to begin iterations over bins assert type(snr_cutoff) is float snr_min = snr_cutoff - 3.0 assert snr_min > 0.0 # we select random uniformly-distributed right assensions # so there's no point in also choosing random GMSTs and any # vlaue is as good as any other gmst = 0.0 # run the sample the requested # of iterations. save some # symbols to avoid doing module attribute look-ups in the # loop acos = math.acos random_uniform = random.uniform twopi = 2. * math.pi pi_2 = math.pi / 2. xlal_am_resp = lal.ComputeDetAMResponse # FIXME: scipy.stats.rice.rvs broken on reference OS. # switch to it when we can rely on a new-enough scipy #rice_rvs = stats.rice.rvs # broken on reference OS # the .reshape is needed in the event that x is a 1x1 # array: numpy returns a scalar from sqrt(), but we must # have something that we can iterate over rice_rvs = lambda x: numpy.sqrt(stats.ncx2.rvs(2., x**2.)).reshape(x.shape) for i in xrange(n_samples): # select random sky location and source orbital # plane inclination # the signal is linearly polaraized, and h_cross = 0 # is assumed, so we need only F+ (its absolute value). ra = random_uniform(0., twopi) dec = pi_2 - acos(random_uniform(-1., 1.)) psi = random_uniform(0., twopi) fplus = tuple( abs(xlal_am_resp(resp, ra, dec, psi, gmst)[0]) for resp in resps) # 1/8 ratio of inverse SNR to distance for each instrument # (1/8 because horizon distance is defined for an SNR of 8, # and we've omitted that factor for performance) snr_times_D = DH_times_8 * fplus # snr * D in instrument whose SNR grows fastest # with decreasing D max_snr_times_D = snr_times_D.max() # snr_times_D.min() / snr_min = the furthest a # source can be and still be above snr_min in all # instruments involved. max_snr_times_D / that # distance = the SNR that distance corresponds to # in the instrument whose SNR grows fastest with # decreasing distance --- the SNR the source has in # the most sensitive instrument when visible to all # instruments in the combo try: start_index = snr_sequence[max_snr_times_D / (snr_times_D.min() / snr_min)] except ZeroDivisionError: # one of the instruments that must be able # to see the event is blind to it continue # min_D_other is minimum distance at which source # becomes visible in an instrument that isn't # involved. max_snr_times_D / min_D_other gives # the SNR in the most sensitive instrument at which # the source becomes visible to one of the # instruments not allowed to participate if len(DH_times_8_other): min_D_other = (DH_times_8_other * fplus).min() / snr_cutoff try: end_index = snr_sequence[max_snr_times_D / min_D_other] + 1 except ZeroDivisionError: # all instruments that must not see # it are blind to it end_index = None else: # there are no other instruments end_index = None # if start_index >= end_index then in order for the # source to be close enough to be visible in all # the instruments that must see it it is already # visible to one or more instruments that must not. # don't need to check for this, the for loop that # comes next will simply not have any iterations. # iterate over the nominal SNRs (= noise-free SNR # in the most sensitive instrument) at which we # will add weight to the PDF. from the SNR in # most sensitive instrument, the distance to the # source is: # # D = max_snr_times_D / snr # # and the (noise-free) SNRs in all instruments are: # # snr_times_D / D # # scipy's Rice-distributed RV code is used to # add the effect of background noise, converting # the noise-free SNRs into simulated observed SNRs # # number of sources b/w Dlo and Dhi: # # d count \propto D^2 |dD| # count \propto Dhi^3 - Dlo**3 D_Dhi_Dlo_sequence = max_snr_times_D / snr_snrlo_snrhi_sequence[ start_index:end_index] for snr, weight in zip( rice_rvs(snr_times_D / numpy.reshape(D_Dhi_Dlo_sequence[:, 0], (len(D_Dhi_Dlo_sequence), 1))), D_Dhi_Dlo_sequence[:, 1]**3. - D_Dhi_Dlo_sequence[:, 2]**3.): pdf[tuple(snr)] += weight # check for divide-by-zeros that weren't caught. also # finds nans if they are there assert numpy.isfinite(pdf.array).all() # convolve samples with gaussian kernel rate.filter_array(pdf.array, rate.gaussian_window(*(1.875, ) * len(pdf.array.shape))) # protect against round-off in FFT convolution leading to # negative valuesin the PDF numpy.clip(pdf.array, 0., PosInf, pdf.array) # zero counts in bins that are below the trigger threshold. # have to convert SNRs to indexes ourselves and adjust so # that we don't zero the bin in which the SNR threshold # falls range_all = slice(None, None) range_low = slice(None, pdf.bins[0][snr_cutoff]) for i in xrange(len(instruments)): slices = [range_all] * len(instruments) slices[i] = range_low # convert bin counts to normalized PDF pdf.to_pdf() # one last sanity check assert numpy.isfinite(pdf.array).all() # done return pdf
def twoD_SearchVolume(found, missed, twodbin, dbin, wnfunc, livetime, bootnum=1, derr=0.197, dsys=0.074): """ Compute the search volume in the mass/mass plane, bootstrap and measure the first and second moment (assumes the underlying distribution can be characterized by those two parameters) This is gonna be brutally slow derr = (0.134**2+.103**2+.102**2)**.5 = 0.197 which is the 3 detector calibration uncertainty in quadrature. This is conservative since some injections will be H1L1 and have a lower error of .17 the dsys is the DC offset which is the max offset of .074. """ if wnfunc: wnfunc /= wnfunc[(wnfunc.shape[0] - 1) / 2, (wnfunc.shape[1] - 1) / 2] x = twodbin.shape[0] y = twodbin.shape[1] z = dbin.n rArrays = [] volArray = rate.BinnedArray(twodbin) volArray2 = rate.BinnedArray(twodbin) #set up ratio arrays for each distance bin for k in range(z): rArrays.append(rate.BinnedRatios(twodbin)) # Bootstrap to account for errors for n in range(bootnum): #initialize by setting these to zero for k in range(z): rArrays[k].numerator.array = numpy.zeros( rArrays[k].numerator.bins.shape) rArrays[k].denominator.array = numpy.zeros( rArrays[k].numerator.bins.shape) #Scramble the inj population if bootnum > 1: sm, sf = scramble_pop(missed, found) else: sm, sf = missed, found for l in sf: #found: tbin = rArrays[dbin[scramble_dist(l.distance, derr, dsys)]] tbin.incnumerator((l.mass1, l.mass2)) for l in sm: #missed: tbin = rArrays[dbin[scramble_dist(l.distance, derr, dsys)]] tbin.incdenominator((l.mass1, l.mass2)) tmpArray2 = rate.BinnedArray( twodbin) #start with a zero array to compute the mean square for k in range(z): tbins = rArrays[k] tbins.denominator.array += tbins.numerator.array if wnfunc: rate.filter_array(tbins.denominator.array, wnfunc) if wnfunc: rate.filter_array(tbins.numerator.array, wnfunc) tbins.regularize() # logarithmic(d) integrand = 4.0 * pi * tbins.ratio() * dbin.centres( )[k]**3 * dbin.delta volArray.array += integrand tmpArray2.array += integrand #4.0 * pi * tbins.ratio() * dbin.centres()[k]**3 * dbin.delta print( "bootstrapping:\t%.1f%% and Calculating smoothed volume:\t%.1f%%\r" % ((100.0 * n / bootnum), (100.0 * k / z)), end=' ', file=sys.stderr) tmpArray2.array *= tmpArray2.array volArray2.array += tmpArray2.array print("", file=sys.stderr) #Mean and variance volArray.array /= bootnum volArray2.array /= bootnum volArray2.array -= volArray.array**2 # Variance volArray.array *= livetime volArray2.array *= livetime * livetime # this gets two powers of live time return volArray, volArray2
def __init__(self, efficiency_data, confidence): self.rate_array = rate.BinnedArray(efficiency_data.efficiency.denominator.bins) self.amplitude_lbl = efficiency_data.amplitude_lbl self.confidence = confidence
def twoD_SearchVolume(self, instruments, dbin=None, FAR=None, bootnum=None, derr=0.197, dsys=0.074): """ Compute the search volume in the mass/mass plane, bootstrap and measure the first and second moment (assumes the underlying distribution can be characterized by those two parameters) This is gonna be brutally slow derr = (0.134**2+.103**2+.102**2)**.5 = 0.197 which is the 3 detector calibration uncertainty in quadrature. This is conservative since some injections will be H1L1 and have a lower error of .17 the dsys is the DC offset which is the max offset of .074. """ if not FAR: FAR = self.far[instruments] found, missed = self.get_injections(instruments, FAR) twodbin = self.twoDMassBins wnfunc = self.gw livetime = self.livetime[instruments] if not bootnum: bootnum = self.bootnum if wnfunc: wnfunc /= wnfunc[(wnfunc.shape[0] - 1) / 2, (wnfunc.shape[1] - 1) / 2] x = twodbin.shape[0] y = twodbin.shape[1] z = int(self.opts.dist_bins) rArrays = [] volArray = rate.BinnedArray(twodbin) volArray2 = rate.BinnedArray(twodbin) #set up ratio arrays for each distance bin for k in range(z): rArrays.append(rate.BinnedRatios(twodbin)) # Bootstrap to account for errors for n in range(bootnum): #initialize by setting these to zero for k in range(z): rArrays[k].numerator.array = numpy.zeros( rArrays[k].numerator.bins.shape) rArrays[k].denominator.array = numpy.zeros( rArrays[k].numerator.bins.shape) #Scramble the inj population and distances if bootnum > 1: sm, sf = self._scramble_pop(missed, found) # I make a separate array of distances to speed up this calculation f_dist = self._scramble_dist(sf, derr, dsys) else: sm, sf = missed, found f_dist = numpy.array([l.distance for l in found]) # compute the distance bins if not dbin: dbin = rate.LogarithmicBins(min(f_dist), max(f_dist), z) #else: print dbin.centres() # get rid of all missed injections outside the distance bins # to prevent binning errors sm, m_dist = self.cut_distance(sm, dbin) sf, f_dist = self.cut_distance(sf, dbin) for i, l in enumerate(sf): #found: tbin = rArrays[dbin[f_dist[i]]] tbin.incnumerator((l.mass1, l.mass2)) for i, l in enumerate(sm): #missed: tbin = rArrays[dbin[m_dist[i]]] tbin.incdenominator((l.mass1, l.mass2)) tmpArray2 = rate.BinnedArray( twodbin) #start with a zero array to compute the mean square for k in range(z): tbins = rArrays[k] tbins.denominator.array += tbins.numerator.array if wnfunc: rate.filter_array(tbins.denominator.array, wnfunc) if wnfunc: rate.filter_array(tbins.numerator.array, wnfunc) tbins.regularize() # logarithmic(d) integrand = 4.0 * pi * tbins.ratio() * dbin.centres( )[k]**3 * dbin.delta volArray.array += integrand tmpArray2.array += integrand #4.0 * pi * tbins.ratio() * dbin.centres()[k]**3 * dbin.delta print( "bootstrapping:\t%.1f%% and Calculating smoothed volume:\t%.1f%%\r" % ((100.0 * n / bootnum), (100.0 * k / z)), end=' ', file=sys.stderr) tmpArray2.array *= tmpArray2.array volArray2.array += tmpArray2.array print("", file=sys.stderr) #Mean and variance volArray.array /= bootnum volArray2.array /= bootnum volArray2.array -= volArray.array**2 # Variance volArray.array *= livetime volArray2.array *= livetime * livetime # this gets two powers of live time return volArray, volArray2
def finish(self, threshold): # bin the injections self._bin_events() # use the same binning for the found injection density as # was constructed for the efficiency self.found_density = rate.BinnedArray(self.efficiency.denominator.bins) # construct the amplitude weighting function amplitude_weight = rate.BinnedArray(rate.NDBins((rate.LinearBins(threshold - 100, threshold + 100, 10001),))) # gaussian window's width is the number of bins # corresponding to 10 units of amplitude, which is computed # by dividing 10 by the "volume" of the bin corresponding # to threshold. index is the index of the element in # amplitude_weight corresponding to the threshold. index, = amplitude_weight.bins[threshold,] window = rate.gaussian_window(10.0 / amplitude_weight.bins.volumes()[index]) window /= 10 * window[(len(window) - 1) / 2] # set the window data into the BinnedArray object. the # Gaussian peaks on the middle element of the window, which # we want to place at index in the amplitude_weight array. lo = index - (len(window) - 1) / 2 hi = lo + len(window) if lo < 0 or hi > len(amplitude_weight.array): raise ValueError("amplitude weighting window too large") amplitude_weight.array[lo:hi] = window # store the recovered injections in the found density bins # weighted by amplitude for x, y, z in self.recovered_xyz: try: weight = amplitude_weight[z,] except IndexError: # beyond the edge of the window weight = 0.0 self.found_density[x, y] += weight # the efficiency is only valid up to the highest energy # that has been injected. this creates problems later on # so, instead, for each frequency, identify the highest # energy that has been measured and copy the values for # that bin's numerator and denominator into the bins for # all higher energies. do the same for the counts in the # found injection density bins. # # numpy.indices() returns two arrays array, the first of # which has each element set equal to its x index, the # second has each element set equal to its y index, we keep # the latter. meanwhile numpy.roll() cyclically permutes # the efficiency bins down one along the y (energy) axis. # from this, the conditional finds bins where the # efficiency is greater than 0.9 but <= 0.9 in the bin # immediately above it in energy. we select the elements # from the y index array where the conditional is true, and # then use numpy.max() along the y direction to return the # highest such y index for each x index, which is a 1-D # array. finally, enumerate() is used to iterate over x # index and corresponding y index, and if the y index is # not negative (was found) the value from that x-y bin is # copied to all higher bins. n = self.efficiency.numerator.array d = self.efficiency.denominator.array f = self.found_density.array bady = -1 for x, y in enumerate(numpy.max(numpy.where((d > 0) & (numpy.roll(d, -1, axis = 1) <= 0), numpy.indices(d.shape)[1], bady), axis = 1)): if y != bady: n[x, y + 1:] = n[x, y] d[x, y + 1:] = d[x, y] f[x, y + 1:] = f[x, y] # now do the same for the bins at energies below those that # have been measured. bady = d.shape[1] for x, y in enumerate(numpy.min(numpy.where((d > 0) & (numpy.roll(d, 1, axis = 1) <= 0), numpy.indices(d.shape)[1], bady), axis = 1)): if y != bady: n[x, 0:y] = n[x, y] d[x, 0:y] = d[x, y] f[x, 0:y] = f[x, y] diagnostic_plot(self.efficiency.numerator.array, self.efficiency.denominator.bins, r"Efficiency Numerator (Before Averaging)", self.amplitude_lbl, "lalapps_excesspowerfinal_efficiency_numerator_before.png") diagnostic_plot(self.efficiency.denominator.array, self.efficiency.denominator.bins, r"Efficiency Denominator (Before Averaging)", self.amplitude_lbl, "lalapps_excesspowerfinal_efficiency_denominator_before.png") diagnostic_plot(self.found_density.array, self.efficiency.denominator.bins, r"Injections Lost / Unit of Threshold (Before Averaging)", self.amplitude_lbl, "lalapps_excesspowerfinal_found_density_before.png") # smooth the efficiency bins and the found injection # density bins using the same 2D window. window = rate.gaussian_window(self.window_size_x, self.window_size_y) window /= window[tuple((numpy.array(window.shape, dtype = "double") - 1) / 2)] rate.filter_binned_ratios(self.efficiency, window) rate.filter_array(self.found_density.array, window) diagnostic_plot(self.efficiency.numerator.array, self.efficiency.denominator.bins, r"Efficiency Numerator (After Averaging)", self.amplitude_lbl, "lalapps_excesspowerfinal_efficiency_numerator_after.png") diagnostic_plot(self.efficiency.denominator.array, self.efficiency.denominator.bins, r"Efficiency Denominator (After Averaging)", self.amplitude_lbl, "lalapps_excesspowerfinal_efficiency_denominator_after.png") diagnostic_plot(self.found_density.array, self.efficiency.denominator.bins, r"Injections Lost / Unit of Threshold (After Averaging)", self.amplitude_lbl, "lalapps_excesspowerfinal_found_density_after.png") # compute the uncertainties in the efficiency and its # derivative by assuming these to be the binomial counting # fluctuations in the numerators. p = self.efficiency.ratio() self.defficiency = numpy.sqrt(p * (1 - p) / self.efficiency.denominator.array) p = self.found_density.array / self.efficiency.denominator.array self.dfound_density = numpy.sqrt(p * (1 - p) / self.efficiency.denominator.array)
def compute_volume_vs_mass(found, missed, mass_bins, bin_type, dbins=None, distribution_param=None, distribution=None, limits_param=None, max_param=None, min_param=None): """ Compute the average luminosity an experiment was sensitive to given the sets of found and missed injections and assuming luminosity is uniformly distributed in space. If distance_param and distance_distribution are not None, use an unbinned Monte Carlo integral (which optionally takes max_distance and min_distance parameters) for the volume and error Otherwise use a simple efficiency * distance**2 binned integral In either case use distance bins to return the efficiency in each bin """ # initialize MC volume integral method: binned or unbinned if distribution_param is not None and distribution is not None and limits_param is not None: mc_unbinned = True else: mc_unbinned = False # mean and std estimate for sensitive volume averaged over search time volArray = rate.BinnedArray(mass_bins) vol2Array = rate.BinnedArray(mass_bins) # found/missed stats foundArray = rate.BinnedArray(mass_bins) missedArray = rate.BinnedArray(mass_bins) # efficiency over distance and its standard (binomial) error effvmass = [] errvmass = [] if bin_type == "Mass1_Mass2": # two-d case first for j, mc1 in enumerate(mass_bins.centres()[0]): for k, mc2 in enumerate(mass_bins.centres()[1]): # filter out injections not in this mass bin newfound = filter_injections_by_mass(found, mass_bins, j, bin_type, k) newmissed = filter_injections_by_mass(missed, mass_bins, j, bin_type, k) foundArray[(mc1, mc2)] = len(newfound) missedArray[(mc1, mc2)] = len(newmissed) # compute the volume using this injection set meaneff, efferr, meanvol, volerr = mean_efficiency_volume( newfound, newmissed, dbins) effvmass.append(meaneff) errvmass.append(efferr) if mc_unbinned: meanvol, volerr = volume_montecarlo( newfound, newmissed, distribution_param, distribution, limits_param, max_param, min_param) volArray[(mc1, mc2)] = meanvol vol2Array[(mc1, mc2)] = volerr return volArray, vol2Array, foundArray, missedArray, effvmass, errvmass for j, mc in enumerate(mass_bins.centres()[0]): # filter out injections not in this mass bin newfound = filter_injections_by_mass(found, mass_bins, j, bin_type) newmissed = filter_injections_by_mass(missed, mass_bins, j, bin_type) foundArray[(mc, )] = len(newfound) missedArray[(mc, )] = len(newmissed) # compute the volume using this injection set meaneff, efferr, meanvol, volerr = mean_efficiency_volume( newfound, newmissed, dbins) effvmass.append(meaneff) errvmass.append(efferr) # if the unbinned MC calculation is available, do it if mc_unbinned: meanvol, volerr = volume_montecarlo(newfound, newmissed, distribution_param, distribution, limits_param, max_param, min_param) volArray[(mc, )] = meanvol vol2Array[(mc, )] = volerr return volArray, vol2Array, foundArray, missedArray, effvmass, errvmass