def YeoJohn(x, blambda): """ Does Yeo-Johnson power transformation on data x with parameter blambda Parameters ---------- x: 1D array one dimensional array of data blambda: float number power transformation parameter Returns ------- out: 1D array power transformation result """ out = array(x) if nany(x >= 0.0) and (blambda != 0.0): out[x >= 0.0] = ((((out[x >= 0.0] + 1.0)**(blambda)) - 1.0) / blambda) if (nany(x >= 0.0)) and (blambda == 0.0): out[x >= 0.0] = log(out[x >= 0.0] + 1.0) if (nany(x < 0.0)) and (blambda != 2.0): out[x < 0.0] = -((((1 - out[x < 0])**(2.0 - blambda)) - 1.0) / (2.0 - blambda)) if (nany(x < 0.0)) and (blambda == 2.0): out[x < 0.0] = -log(1.0 - out[x < 0.0]) return out
def YeoJohnInv(x, blambda): """ Does inverse Yeo-Johnson power transformation on data x with parameter blambda Parameters ---------- x: 1D array one dimensional array of data blambda: float number power transformation parameter Returns ------- out: 1D array inverse power transformation result """ out = array(x) if (nany(x >= 0.0)) and (blambda != 0.0): out[x >= 0.0] = ( (out[x >= 0.0] * blambda + 1.0)**(1.0 / blambda)) - 1.0 if (nany(x >= 0.0)) and (blambda == 0.0): out[x >= 0.0] = exp(out[x >= 0.0]) - 1.0 if (nany(x < 0.0)) and (blambda != 2.0): out[x < 0.0] = 1.0 - ((1.0 - out[x < 0.0] * (2.0 - blambda))**(1.0 / (2.0 - blambda))) if (nany(x < 0.0)) and (blambda == 2.0): out[x < 0.0] = 1.0 - exp(-out[x < 0.0]) return out
def Hurst(x): """ Calculates Hurst exponent, long term memory in array. For random walk process it cloes to 0.5. For non-stationary data it goes above 1. For data with any periodic pattern Hurst exponent lies in between interval [0.75,0.9]. Parameters ---------- x: 1D array one dimensional array Returns ------- hexp: float Hurst exponent value, normally it lays in interval [0,1] """ def half(N): # halving segments indeces in N return sort(append(N, (N[:-1] + trunc((diff(N) + 1.0) / 2.0)))) def rscalc(x): # simple R/S Hurst exponent estimation y = cumsum(x - mean(x)) R = max(y) - min(y) S = std(x, ddof=1) return R / S try: n = len(array(x)) except Exception: n = 0 if n >= 16: X = array(n) Y = array(rscalc(x)) N = array([0, trunc(n / 2.0), n]) # calculating Hurst exponent for halved segments of x while min(diff(N)) >= 8: xl = [] yl = [] for i in range(1, len(N)): rs = rscalc(x[N[i - 1]:N[i]]) xl.append((N[i] - N[i - 1])) yl.append(rs) xl = array(xl) yl = array(yl) X = append(X, mean(xl)) Y = append(Y, mean(yl)) N = half(N) if nany(isnan(Y)): X = X[~isnan(Y)] Y = Y[~isnan(Y)] X = column_stack((log(X), ones(len(X)))) Y = log(Y) # linear regerssion between log(n) and log(R/S) values hexp = lstsq(X, Y)[0][0] else: if n > 0: hexp = rscalc(x) else: hexp = None return hexp
def plot_vb(self, pos, vbs, **args): from numpy import isnan from numpy import any as nany from numpy import diff cspan = args.get('cspan', 1) rspan = args.get('rspan', 1) self.ax = self.get_ax(pos, rspan=rspan, cspan=cspan) if vbs is not None and not nany(isnan(vbs)): len_vb = len(vbs) self.ax.plot(vbs, label='vbs') vb_df = diff(vbs) vb_df_neg_idx = arange(len(vb_df))[vb_df < 0] + 1 vb_negs = vbs[vb_df_neg_idx] self.ax.plot(vb_df_neg_idx, vb_negs, 'rx', ms=10) for i, v in zip(vb_df_neg_idx, vb_negs): self.ax.text(i, v, '%d: %f' % (i, v), ha='right', va='top') x, y = len_vb - 1, vbs[-1] txt = 'Itr:%d,VB:%f' % (x, y) self.ax.plot(x, y, 'bo') self.ax.text(x, y, txt, fontsize='small', ha='right', va='top') xlim = (0, len_vb) ylim = (vbs.min(), vbs.max() + 10) else: self.ax.text(0, 0, 'No Variational Bound Values', va='center', ha='center') xlim = (-10, 10) ylim = (-10, 10) title = 'variational bound' self._decos_str(title=title) self._decos_grid(xlim=xlim, ylim=ylim)
def CalcOnGroups(div_groups, func_calc, func_def=Median): """ Gives results of calculation of func_calc on divided groups of data Parameters ---------- div_groups: 2D array two dimensional array of data func_calc: function name name of function to be used to calculate on div_groups rows func_def: function name name of function to be used to calculate on div_groups rows in case when func_calc is failed Returns ------- der_vals: 1D array derived values from div_groups using func_calc """ default_value = func_def(div_groups) der_vals = apply_along_axis(func_calc, 0, div_groups) if (nany(isnan(der_vals))): der_vals[where(isnan(der_vals))[0]] = default_value return der_vals
def StatLen(itseries, iobs, xhint=False, xper=False, nchanges=None): """ Detecting stationary changes in time series itseries using different methods Parameters ---------- itseries: 1D array numpy array or other array that can be converted into numpy array iobs: integer number of observations per day xhint: boolean indices if data has bad behavior to treat with robust methods xper: boolean indices if data periodic or not to treat with less robust methods Returns ------- stati2: dictionary containing to numpy arrays named "strong" and "weak", "strong" has stationary change indeces with strong changes "weak" has stationary change indeces with weak changes "lambdas" Kolmogorov-Smirnov test lambda values found used KSValues method """ intseries = array(itseries) empty_array = empty(0, dtype=int) lchanges = empty_array l3dstat = 0 trend_det = 0 nobsi = intseries.size stati2 = {"strong": [0, (nobsi - 1)], "weak": [0, (nobsi - 1)]} obsi = iobs if ((nobsi - 2 * obsi) >= 0): rnch = int(round(nobsi / (3 * obsi))) else: rnch = 0 if nchanges is not None and obsi >= 24: rnch = nchanges if rnch > 0: lchanges = MultipleChange(intseries, Q=rnch) if (lchanges.size < rnch) or (not xper): xchange = SingleChange(intseries) else: xchange = empty_array if not xper: if nobsi > 3 * obsi: trend_det = TestS(intseries[(nobsi - 2 * obsi):]) * TestS(intseries[(nobsi - obsi):]) l3dstat = TestMV(intseries, (nobsi - obsi), obsi) + TestA(intseries, (nobsi - obsi), obsi) if (nobsi > (3 * obsi)) and (l3dstat > 0): lchanges0 = MultipleChange(intseries[(nobsi - 3 * obsi):], Q=3, robust=True) if lchanges0.size > 0: lchanges0 = lchanges0 + (nobsi - 3 * obsi) else: lchanges0 = empty_array if (lchanges.size > 0) and (lchanges0.size > 0): dchanges = [nany((w - lchanges) < 3) for w in lchanges0] if sum(dchanges) > 0: lchanges0 = lchanges0[~array(dchanges)] if lchanges0.size > 0: lchanges = sort(append(lchanges, lchanges0)) if (lchanges.size == 0) and (lchanges0.size > 0): lchanges = lchanges0 if (lchanges.size > 0) and (xchange.size > 0) and (min(abs(lchanges - xchange)) >= 3): lchanges = sort(append(lchanges, xchange)) else: if (lchanges.size == 0) and (xchange.size > 0): lchanges = xchange else: if (lchanges.size > 0) and (xchange.size > 0): if not nany(lchanges == xchange): lchanges = sort(append(lchanges, xchange)) if (lchanges.size == 0) and (xchange.size > 0): lchanges = xchange if (lchanges.size > 0): lchanges = sort(lchanges) if (trend_det > 0): lchanges = lchanges[lchanges <= (nobsi - obsi - 1)] ltest = array(map(lambda(w): TestA(intseries, w, obsi) + TestMV(intseries, w, obsi) + TestK(intseries, w, obsi, 2), lchanges)) if xhint: kltest = array(map(lambda(w): TestK(intseries, w, obsi, 0), lchanges)) else: kltest = array(map(lambda(w): TestK(intseries, w, obsi, 1), lchanges)) if (not xhint) and nany(ltest > 0.19): stati2["strong"] = [0] + lchanges[ltest > 0.19].tolist() + [nobsi - 1] if (xhint) and nany(ltest > 0.06): stati2["strong"] = [0] + lchanges[ltest > 0.06].tolist() + [nobsi - 1] if nany(kltest > 0.06): stati2["weak"] = [0] + lchanges[kltest > 0.06].tolist() + [nobsi - 1] return stati2
def source_prob(config, ra, dec, zs, fluxes, flux_errs, ews_obs, ew_err, c_obs, which_color, addl_fluxes, addl_fluxes_error, addl_line_names, flim_file, h=0.67, ignore_noise=False, extended_output=False): """ Return P(LAE|DATA)/P(DATA) and P(DATA|LAE)P(LAE)/(P(DATA|OII)P(OII)) given input information about the sources Parameters ---------- config : ConfigParser configuration object ra, dec, zs, fluxes, flux_errs, ews_obs, ew_err, c_obs : array positions, redshifts (assuming LAE), line fluxes, errors on line fluxes, equivalent widths, errors on EW and colours of the sources. (XXX Latter two not used!) which_color : str which colour is given. Colour not used! addl_fluxes, addl_fluxes_error : 2D array each i of addl_fluxes[i, :] should correspond, in order, to the fluxes measured for each source for the emission line named at position i in addl_line_names. To not use set to None (not an array of None!) addl_line_names : array names of emission lines stored in addl_fluxes, in the correct order (see above). To not use set to None (not an array of None!) flim_file : str file containing the flux limits (here for compatibility with Leung+ 2016 style API, not used here) h : float Hubbles constant/100 ignore_noise : bool ignore the noise on the input parameters and assume they are perfect extended_output : bool Return extended output Returns ------- prob_lae_given_data : probability source is an LAE prob_lae_given_data_justlum : if Extended = True, probability computed just from flux and redshift prob_lae_given_data_lum_ew if Extended = True, probability computed just from flux, redshift and equivalent width prob_lae_given_data_lum_lines: if Extended = True, probability computed just from flux, redshift and the flux in other emission lines """ lae_ew = EquivalentWidthAssigner.from_config(config, 'LAE_EW') oii_ew = EquivalentWidthAssigner.from_config(config, "OII_EW") lf_lae = LuminosityFunction.from_config(config, "LAE_LF", ew_assigner=lae_ew) lf_oii = LuminosityFunction.from_config(config, "OII_LF") cosmo = generate_cosmology_from_config(config) oii_zlim = config.getfloat("General", "oii_zlim") _logger.info("Using Hubbles Constant of {:f}".format(h*100)) interp_ew = True lae_ew_obs = InterpolatedParameter(config.get("InterpolatedEW", "lae_file"), "EW_BCENS") oii_ew_obs = InterpolatedParameter(config.get("InterpolatedEW", "oii_file"), "EW_BCENS") if ignore_noise: lae_ew_obs = lae_ew oii_ew_obs = oii_ew interp_ew = False oii_ew_max = config.getfloat("InterpolatedEW", "oii_ew_max") # Cast everything to arrays ra = array(ra) dec = array(dec) zs = array(zs) fluxes = array(fluxes) ews_obs = array(ews_obs) c_obs = array(c_obs) wls = (zs + 1.0)*config.getfloat("wavelengths", "LAE") zs_oii = wls/config.getfloat("wavelengths", "OII") - 1.0 # Compute the volume elements dvol_lae = return_delta_volume(wls, config.getfloat("wavelengths", "LAE"), cosmo, wl_lae=config.getfloat("wavelengths", "LAE")) dvol_oii = return_delta_volume(wls, config.getfloat("wavelengths", "OII"), cosmo, wl_lae=config.getfloat("wavelengths", "LAE") ) # Remove source too close to be mistaken for LAEs and therefore removed # from catalogue (follows argument used for Leung+ 2017) dvol_oii[zs_oii < oii_zlim] = 0.0 # EW factors ew_n_lae = return_ew_n(ews_obs, zs, lae_ew_obs, interp_ew = interp_ew) ew_n_oii = return_ew_n(ews_obs, zs_oii, oii_ew_obs, interp_ew = interp_ew) # Always LAE according to Leung+ (seems to be true in the sims, very, very rare for OII) # Might need to change if EW_ERR grows for OII ew_n_oii[ews_obs < 0.0] = 0.0 ew_n_lae[ews_obs < 0.0] = 1.0 # Upper limit of the OII EW tabulation - assume LAE ew_n_oii[ews_obs > oii_ew_max] = 0.0 ew_n_lae[ews_obs > oii_ew_max] = 1.0 # Luminosity function factors if ignore_noise: lf_n_lae = return_lf_n(fluxes, zs, lf_lae, cosmo) lf_n_oii = return_lf_n(fluxes, zs_oii, lf_oii, cosmo) else: fluxes_lae = InterpolatedParameter(config.get("InterpolatedFlux", "lae_file"), "FLUX_OBS_BCENS") fluxes_oii = InterpolatedParameter(config.get("InterpolatedFlux", "oii_file"), "FLUX_OBS_BCENS") lf_n_lae = lf_n_interp(fluxes, zs, lf_lae, fluxes_lae, cosmo) lf_n_oii = lf_n_interp(fluxes, zs_oii, lf_oii, fluxes_oii, cosmo) # Add additional lines to classification probability (if they're there) n_lines_lae = 1.0 n_lines_oii = 1.0 if type(addl_line_names) != type(None): for line_name, taddl_fluxes, taddl_fluxes_errors in zip(addl_line_names, addl_fluxes[:], addl_fluxes_error[:]): tn_lines_lae, tn_lines_oii = n_additional_line(fluxes, flux_errs, taddl_fluxes, taddl_fluxes_errors, config.getfloat("RelativeLineStrengths", line_name), ignore_noise=ignore_noise) if nany(tn_lines_lae < 0.0) or nany(tn_lines_oii < 0.0): dodgy_is = tn_lines_lae < 0.0 _logger.warning(tn_lines_lae[dodgy_is], fluxes[dodgy_is], taddl_fluxes[dodgy_is], zs_oii[dodgy_is], line_name) _logger.warning("The line {:s} results in some negative probabilities".format(line_name)) # Not an LAE or an OII? neither = (tn_lines_lae + tn_lines_oii) < 1e-30 if nany(neither): _logger.warning("Source is neither OII or LAE based off of other emission lines") n_lines_lae *= tn_lines_lae n_lines_oii *= tn_lines_oii # Compute expected number based off of L and z nlae = lf_n_lae*dvol_lae noii = lf_n_oii*dvol_oii prob_lae_given_data_justlum = nlae/(nlae + noii) # Include EW information but not emission lines nlae_ew = nlae*ew_n_lae noii_ew = noii*ew_n_oii prob_lae_given_data_lum_ew = nlae_ew/(nlae_ew + noii_ew) # Include emission lines but not EW nlae_lines = nlae*n_lines_lae noii_lines = noii*n_lines_oii prob_lae_given_data_lum_lines = nlae_lines/(nlae_lines + noii_lines) # Include everything (default return) nlae = nlae*n_lines_lae*ew_n_lae noii = noii*n_lines_oii*ew_n_oii prob_lae_given_data = nlae/(nlae + noii) if not extended_output: return prob_lae_given_data else: #return prob_lae_given_data, prob_lae_given_data_justlum, prob_lae_given_data_lum_ew, prob_lae_given_data_lum_lines, ew_n_lae, ew_n_oii return prob_lae_given_data, prob_lae_given_data_justlum, prob_lae_given_data_lum_ew, prob_lae_given_data_lum_lines
def integrate_lf_limits(self, config, ra, dec, zs, lmins, lmaxes, lambda_, cosmo): """ Integrate the luminosity between specific flux limit. Sets huge normalisation value for non-physical redshifts (i.e. deals with low redshift limits for OII and LAE) Parameter --------- config : ConfigParser configuration object ra, dec, zs : array locations to integrate the LF, redshift to set the LF parameters lmins, lmaxes : float array arrays of limits, each row corresponds to ra, dec and zs lambda_ : float wavelength of line (for z -> wl conversion) Returns ------- range_integrals : array Integral of LF between lmin and lmax norms : array Integral of LF from flux limit up to infinity """ # Luminosity limits wls = (1.0 + array(zs)) * lambda_ # For flux limit need zs assuming LAE zlaes = wls / config.getfloat("wavelengths", "LAE") - 1.0 llims = self.flims(zlaes) * ( 4.0 * pi * square(cosmo.luminosity_distance(zs).to('cm').value)) if len(llims) == 1: if lmins < llims: raise TooFaintForLimitsException( "There are Lmin values less than the limiting L") else: if nany(lmins < llims): _logger.error(len(lmins[lmins < llims]), lmins[lmins < llims], llims[lmins < llims]) raise TooFaintForLimitsException( "There are Lmin values less than the limiting L") # If no flux errors included just integrate the luminosity function analytically if not self.flux_errors: # No need to do phi* and stuff here phistars = self.lf.phi_star_func(zs) lstars = self.lf.Lstars_func(zs) alphas = self.lf.alphas_func(zs) # Integrate gamma function range_integrals = zeros(len(alphas)) norms = -99.0 * ones(len(alphas)) # Sources out of range undetectable = (array(llims) < 0.1) | (array(zs) < 0.0) # Sources that are too bright to be likely candidates # (otherwise Gamma integral fails) undetectable = undetectable | (lmins / lstars >= 25.0) detectable = (array(llims) >= 0.1) & (array(zs) >= 0.0) # Get lmax from lf lmaxes_for_norm = self.lf.return_lmax_at_z(zs) # LMAX CHOICE NOT TESTED!!! norms[detectable] = phistars[detectable] * gamma_integral_limits( alphas[detectable] + 1, (llims / lstars)[detectable], (1000 * lmaxes_for_norm / lstars)[detectable]) # Too bright to be likely not_too_bright_det = (lmins / lstars < 25.0) & detectable range_integrals[not_too_bright_det] = phistars[ not_too_bright_det] * gamma_integral_limits( alphas[not_too_bright_det] + 1.0, (lmins / lstars)[not_too_bright_det], (lmaxes / lstars)[not_too_bright_det]) # Correct n-density for the EW if self.lf.ew_assigner: norms[ detectable] *= self.lf.ew_assigner.classification_correction( zs[detectable]) range_integrals[ not_too_bright_det] *= self.lf.ew_assigner.classification_correction( zs[not_too_bright_det]) return range_integrals, norms else: raise NotImplementedError()
def source_prob(config, ra, dec, zs, fluxes, flux_errs, ews_obs, ew_err, c_obs, which_color, addl_fluxes, addl_fluxes_error, addl_line_names, flim_file, extended_output=False): """ Return P(LAE) = P(LAE|DATA)/P(DATA) and evidence ratio P(DATA|LAE)P(LAE)/(P(DATA|OII)P(OII)) given input information about the sources Parameters ---------- config : ConfigParser configuration object ra, dec, zs, fluxes, flux_errs, ews_obs, ew_err, c_obs : array positions, redshifts (assuming LAE), line fluxes, errors on line fluxes, equivalent widths, errors on EW and colours of the sources (latter two not used!) which_color : str which colour is given addl_fluxes, addl_fluxes_error : 2D array each i of addl_fluxes[i, :] should correspond, in order, to the fluxes measured for each source for the emission line named at position i in addl_line_names. To not use set to None (not an array of None!) addl_line_names : array names of emission lines stored in addl_fluxes, in the correct order (see above). To not use set to None (not an array of None!) flim_file : str file containing the flux limits h : float Hubbles constant/100 extended_output : bool Return extended output Returns ------- posterior_odds, prob_lae_given_data : float arrays posterior_odds = P(DATA|LAE)P(LAE)/(P(DATA|OII)P(OII)) P(LAE|DATA) = P(DATA|LAE)*P(LAE)/(P(DATA|LAE)*P(LAE) + P(DATA|OII)*P(OII)) """ lae_ew = EquivalentWidthAssigner.from_config(config, 'LAE_EW') oii_ew = EquivalentWidthAssigner.from_config(config, "OII_EW") lf_lae = LuminosityFunction.from_config(config, "LAE_LF", ew_assigner=lae_ew) lf_oii = LuminosityFunction.from_config(config, "OII_LF") cosmo = generate_cosmology_from_config(config) oii_zlim = config.getfloat("General", "oii_zlim") _logger.info("Using Hubbles Constant of {:f}".format(cosmo.H0)) # Cast everything to arrays ra = array(ra) dec = array(dec) zs = array(zs) fluxes = array(fluxes) ews_obs = array(ews_obs) c_obs = array(c_obs) zs_oii = ( (zs + 1.0) * config.getfloat("wavelengths", "LAE")) / config.getfloat( "wavelengths", "OII") - 1.0 # Probability of equivalent widths prob_ew_lae = ew_prob(ews_obs, zs, lae_ew) prob_ew_oii = ew_prob(ews_obs, zs_oii, oii_ew) # Deal with negative and huge EW as in Leung+ , negative or huge EW only for # really noisey continuum values which should be for LAEs prob_ew_lae[(ews_obs < 0.0) | (ews_obs > 5000.0)] = 1.0 prob_ew_oii[(ews_obs < 0.0) | (ews_obs > 5000.0)] = 0.0 # Add additional lines to classification probability prob_lines_lae = 1.0 prob_lines_oii = 1.0 if type(addl_line_names) != type(None): for line_name, taddl_fluxes, taddl_fluxes_errors in zip( addl_line_names, addl_fluxes[:], addl_fluxes_error[:]): rlstrgth = config.getfloat("RelativeLineStrengths", line_name) tprob_lines_lae, tprob_lines_oii = prob_additional_line( line_name, fluxes, flux_errs, taddl_fluxes, taddl_fluxes_errors, rlstrgth) if nany(tprob_lines_lae < 0.0) or nany(tprob_lines_oii < 0.0): _logger.warning( "Negative probability for line {:s}".format(line_name)) #dodgy_is = tprob_lines_lae < 0.0 #_logger.error(tprob_lines_lae[dodgy_is], fluxes[dodgy_is], taddl_fluxes[dodgy_is], zs_oii[dodgy_is], line_name) #raise NegativeProbException("The probability here is negative!") # Not an LAE or an OII? neither = (tprob_lines_lae + tprob_lines_oii) < 1e-30 if nany(neither): _logger.warning( "Emission line {:s} doesn't look like it's from OII or LAE!" ) #_logger.error(fluxes[neither], flux_errs[neither], taddl_fluxes[neither], taddl_fluxes_errors[neither], zs_oii[neither], line_name) #_logger.error(flim_file) #raise UnrecognizedSourceException("Neither OII or LAE") prob_lines_lae *= tprob_lines_lae prob_lines_oii *= tprob_lines_oii # Carry out integrals of the luminosity function _logger.info('Computing OII posteriors') prob_flux_oii, noiis = luminosity_likelihoods(config, ra, dec, zs_oii, fluxes, lf_oii, config.getfloat( "wavelengths", "OII"), flim_file, cosmo, delta_l=0.05) _logger.info('Computing LAE posteriors') prob_flux_lae, nlaes = luminosity_likelihoods(config, ra, dec, zs, fluxes, lf_lae, config.getfloat( "wavelengths", "LAE"), flim_file, cosmo, delta_l=0.05) # Compute the LAE/OII priors prior_oii = noiis / (nlaes + noiis) prior_lae = nlaes / (nlaes + noiis) # P(DATA|LAE), P(DATA|OII) prob_data_lae = prob_ew_lae * prob_flux_lae * prob_lines_lae prob_data_oii = prob_flux_oii * prob_ew_oii * prob_lines_oii # This section for test output #table = Table([prob_ew_lae, prob_flux_lae, prob_lines_lae, prob_ew_oii, prob_flux_oii, prob_lines_oii], # names=["prob_ew_lae", "prob_flux_lae", "prob_lines_lae", "prob_ew_oii", # "prob_flux_oii", "prob_lines_oii"]) #table.write("probs_lae.fits") # Remove anything with too low an OII redshift prob_data_oii[zs_oii < oii_zlim] = 0.0 prior_oii[zs_oii < oii_zlim] = 0.0 prior_lae[zs_oii < oii_zlim] = 1.0 prob_data = prob_data_lae * prior_lae + prob_data_oii * prior_oii #print(prior_lae, prior_oii, prob_data_lae, prob_data_oii) # Ignore div0 errors posterior_odds = divide(prob_data_lae * prior_lae, prob_data_oii * prior_oii) prob_lae_given_data = divide(prob_data_lae * prior_lae, prob_data) if nany(prob_lae_given_data < 0.0) or nany(isnan(prob_lae_given_data)): #dodgy_is = (prob_lae_given_data < 0.0) | isnan(prob_lae_given_data) #print(prob_lae_given_data[dodgy_is], prob_data_lae[dodgy_is], prob_data_oii[dodgy_is], # prior_lae[dodgy_is], prior_oii[dodgy_is], prob_ew_lae[dodgy_is], prob_ew_oii[dodgy_is], # ews_obs[dodgy_is], prob_lines_lae[dodgy_is], prob_lines_oii[dodgy_is], zs_oii[dodgy_is]) #raise NegativeProbException("""The probability here is negative or NAN! Could be low-z OII (z<0.05) or weird # source neither OII or LAE!""") _logger.warning("Some sources appear to be neither LAE or OII!") # Not a chance it's OII posterior_odds[(prior_oii < 1e-80) | (prob_data_oii < 1e-80)] = 1e32 prob_lae_given_data[(prior_oii < 1e-80) | (prob_data_oii < 1e-80)] = 1.0 if not extended_output: return posterior_odds, prob_lae_given_data else: return posterior_odds, prob_lae_given_data, prob_data_lae, prob_data_oii, prior_lae, prior_oii
def luminosity_likelihoods(config, ras, decs, zs, fluxes, lf, lambda_, flim_file, cosmo, delta_l=0.05): """ Return likelihoods based off of the flux and redshift of an object. Also return the expected number density of objects at that redshift (+/- 4AA). Parameters ---------- config : ConfigParser configuration object ras, decs, zs, fluxes : array Source properties. zs are true redshifts (i.e. not the inferred LAE ones) lf : line_classification.lfs_ews.luminosity_function:LuminosityFunction a luminosity function to integrate flim_file : str path to a file containing the flux limits. Used to set luminosity function integral faint limits cosmo : astropy.cosmology:FLRW an astropy cosmology object to deal with cosmology delta_l : float (Optional) percentage range of luminosity integral (set to +/-5% based off Leung+ 2016 Returns ------- prob_flux : array P(flux|LF) the probability that an observed flux in the range 1 +- delta_l was drawn from a LF n_expected : array The expected number of sources in a wavelength slice +/- 4A around its wavelength (value to roughly match what Andrew Leung used) (assuming 1 steradian of sky) """ # Grab cosmology from luminosity function wl = lambda_ * (1.0 + array(zs)) zlaes = wl / config.getfloat("wavelengths", "LAE") - 1.0 # Has to be LAE redshift Ls = 4.0 * pi * square( cosmo.luminosity_distance(zs).to('cm').value) * fluxes # Class to integrate the luminosity function lf_inter = LFIntegrator(lf, flim_file) # 5% range from Andrew Leung's work lupper = (1.0 + delta_l) * Ls llower = (1.0 - delta_l) * Ls # Don't let range drop below Lmin llims = lf_inter.flims(zlaes) * ( 4.0 * pi * square(cosmo.luminosity_distance(zs).to('cm').value)) out_of_range_is = llower < llims # Set lower limit to flux limit and add missing range to upper limit if nany(out_of_range_is): lrange = lupper[out_of_range_is] - llower[out_of_range_is] lupper[out_of_range_is] = llims[out_of_range_is] + lrange llower[out_of_range_is] = 1.0001 * llims[out_of_range_is] lf_ints, ns = lf_inter.integrate_lf_limits(config, ras, decs, zs, llower, lupper, lambda_, cosmo) # P(flux|LF) prob_flux = lf_ints / ns # Undetectable stuff prob_flux[ns < -98] = 0.0 ns[ns < -98] = 0.0 if nany(prob_flux < 0.0): dodges_is = prob_flux < 0.0 _logger.error("zs, L_lower L_upper LF_Integral Norm") _logger.error(zs[dodges_is], llower[dodges_is], lupper[dodges_is], lf_ints[dodges_is], ns[dodges_is]) raise NegativeProbException("The probability here is negative!") # Now derive the expected number of sources in the wavelength slices zmins = (wl - 4.0) / lambda_ - 1.0 zmaxes = (wl + 4.0) / lambda_ - 1.0 vols = cosmo.comoving_volume(zmaxes).to( 'Mpc3').value - cosmo.comoving_volume(zmins).to('Mpc3').value n_expected = vols * ns # for testing #for a, b, c, d in zip(ns, vols, zmins, zmaxes)[:10]: # print(a, b, c, d, lambda_) return prob_flux, n_expected
def Predicter(norm_times, norm_data, is_positive=True, trend_only=False, psense="low"): """ Gives prediction based one day dynamic threshold for a data pair points provided in norm_times and norm_data Parameters ---------- norm_times: 1D array one dimensional array of timestamps with fixed step, i.e. timestamps of some timeseries norm_data: 1D array one dimensional array of values with fixed step, i.e. values of some timeseries is_positive: boolean indicates if data is non-negative or not trend_only: boolean indicates if only trend should be predicted or not psense: string confidence level, i.e. sensitivity, possible values are "low", "medium", "high" Returns ------- x_forecast: numpy array array containing 3 columns with timestamps, lower and upper bounds """ last_day = daytrunc(norm_times[-1]) delta_t = norm_times[1] - norm_times[0] obsi = ceil((24 * 60 * 60) / delta_t) pred_dates = arange((norm_times[-1] + delta_t), (last_day + 2.0 * 24.0 * 60.0 * 60.0), delta_t) pred_dates = pred_dates - (24.0 * 60.0 * 60.0) linear_prediction = { "thresholds": column_stack( (pred_dates, zeros(pred_dates.size), zeros(pred_dates.size))), "reliability": 0.0 } nahead = pred_dates.size mlambda = BCLambda(norm_data) linmod = TSLM(YeoJohn(norm_data, mlambda), obsi, trend_only=trend_only) pred_reliab = sqrt(linmod.rsquared_adj) x_forecast = PredictLM(linmod, h=nahead, sensitivity=psense, trend_only=trend_only) x_forecast = apply_along_axis(lambda (w): YeoJohnInv(w, mlambda), 1, x_forecast) if nany(isnan(x_forecast[:, 1])) or nany(isnan(x_forecast[:, 3])): if (sum(~isnan(x_forecast[:, 0])) > 1) and (sum(~isnan(x_forecast[:, 4])) > 1): divx = x_forecast[:, 4] - x_forecast[:, 0] divm = array([]) if sum(isnan(x_forecast[:, 2])) == 0: divm = x_forecast[:, 2] else: if sum(isnan(x_forecast[:, 1])) == 0: divm = x_forecast[:, 1] else: if sum(isnan(x_forecast[:, 3])) == 0: divm = x_forecast[:, 3] if sum(isnan(divx)) > 0: divm0 = divm[~isnan(divx)] divx0 = divx[~isnan(divx)] else: divm0 = divm divx0 = divx div_lm = OLS(divx0, add_constant(divm0)).fit() divx_est = div_lm.params[0] + div_lm.params[1] * divm if nany(isnan(divx)): divx[isnan(divx)] = divx_est[isnan(divx)] if nany(isnan(x_forecast[:, 0])): naninds = where(isnan(x_forecast[:, 0]))[0] x_forecast[naninds, 0] = x_forecast[naninds, 4] - divx[naninds] if nany(isnan(x_forecast[:, 4])): naninds = where(isnan(x_forecast[:, 4]))[0] x_forecast[naninds, 4] = x_forecast[naninds, 0] + divx[naninds] else: if (sum(isnan(x_forecast[:, 0])) == sum(isnan( x_forecast[:, 2]))) and (sum(isnan(x_forecast[:, 4])) == sum(isnan(x_forecast[:, 2]))): div_low1 = x_forecast[:, 1] div_low2 = x_forecast[:, 0] if sum(isnan(div_low2)) > 0: div_low10 = div_low1[~isnan(div_low2)] div_low20 = div_low2[~isnan(div_low2)] else: div_low10 = div_low1 div_low20 = div_low2 div_lm_low = OLS(div_low20, add_constant(div_low10)).fit() upper_est = (x_forecast[:, 3] - div_lm_low.params[0]) / div_lm_low.params[1] div_up1 = x_forecast[:, 4] div_up2 = x_forecast[:, 3] if sum(isnan(div_low2)) > 0: div_up10 = div_up1[~isnan(div_up2)] div_up20 = div_up2[~isnan(div_up2)] else: div_up10 = div_up1 div_up20 = div_up2 div_lm_up = OLS(div_up20, add_constant(div_up10)).fit() lower_est = (x_forecast[:, 1] - div_lm_up.params[0]) / div_lm_up.params[1] if nany(isnan(x_forecast[:, 4])): naninds_up = where(isnan(x_forecast[:, 4]))[0] x_forecast[naninds_up, 4] = upper_est[naninds_up] if nany(isnan(x_forecast[:, 0])): naninds_low = where(isnan(x_forecast[:, 0]))[0] x_forecast[naninds_low, 0] = lower_est[naninds_low] x_forecast = delete(x_forecast, (1, 2, 3), 1) if is_positive: if nany(x_forecast[:, 0] < 0): x_forecast[x_forecast[:, 0] < 0, 0] = 0.0 if nany(x_forecast[:, 1] < 0): x_forecast[x_forecast[:, 1] < 0, 1] = 0.0 x_forecast = column_stack((pred_dates, x_forecast)) if trend_only: x_forecast = x_forecast[-obsi:, ] linear_prediction = {"thresholds": x_forecast, "reliability": pred_reliab} return linear_prediction
def BadInd(norm_times, norm_data, real_times, change_inds, xhint=False): """ Checks if some days in a data can be removed to have stationary data Parameters ---------- norm_times: 1D array one dimensional array of timestamps with fixed step, i.e. timestamps of some timeseries norm_data: 1D array one dimensional array of values with fixed step, i.e. values of some timeseries real_times: 1D array one dimensional array of timestamps of raw data change_inds: 1D array indices of stationary changes xhint: boolean indicates if data is highly unstable or not Returns ------- erase_inf: dictionary of 2 numpy arrays numpy array named "indices" is indeces in raw data to be removed numpy array named "stat" is indeces of new stationary change points, can be empty """ nobsi = norm_data.size delta_t = norm_times[2] - norm_times[1] obsi = ceil(24.0 * 60.0 * 60.0 / delta_t) schanges = change_inds erase_inf = {"indices": [], "stat": schanges} if len(schanges) > 0: binds = where(diff(schanges) < obsi)[0] if len(binds) > 0: erase_inds = array([]) rerase_inds = array([]) chmark = ones(len(binds)) for i in range(len(binds)): if ((schanges[binds[i]] + 1 + obsi) < (nobsi - obsi)): chts = delete( norm_data, arange(schanges[binds[i]] + 1, schanges[binds[i]] + 1 + obsi)) else: chts = delete( norm_data, arange(schanges[binds[i]] + 1, schanges[binds[i] + 1] - 1)) if TestK(chts, schanges[binds[i]], obsi, 2) == 0: chmark[i] = TestA(chts, schanges[binds[i]], obsi) + TestMV( chts, schanges[binds[i]], obsi) else: chmark[i] = 0.2 if (not xhint) and (chmark[i] == 0.1): chmark[i] = 0 if (chmark[i] < 0.06): if ((schanges[binds[i]] + obsi + 1) <= nobsi): erase_inds = append( erase_inds, arange(schanges[binds[i]], (schanges[binds[i]] + obsi))) else: erase_inds = append(erase_inds, arange(schanges[binds[i]], nobsi)) remdate = real_times[max( where( real_times <= norm_times[schanges[binds[i]]])[0])] rerase_inds = append( rerase_inds, where(((real_times <= (remdate + 24 * 60 * 60)) & (real_times > remdate)))[0]) erase_inds = unique(erase_inds) rerase_inds = unique(rerase_inds) if nany(chmark < 0.06): temp_inds = binds[where(chmark < 0.06)[0]] temp_inds = append(temp_inds, (temp_inds + 1)) schanges = delete(schanges, temp_inds) erase_stat = intersect1d(erase_inds, schanges) if (len(erase_stat) > 0): schanges = setdiff1d(schanges, erase_stat) # damn asymetric setdiff! erase_inf["indices"] = rerase_inds erase_inf["stat"] = schanges return erase_inf