def differenceOfmeans(humanMean=4.5, sampleSize=50, variance=0.2): #note that tau is not sigma #sigma^2=1/tau t = 1 / variance #what is the probability that an analyst would give this image the same rating? mu = TruncatedNormal('mu', mu=humanMean, tau=t, a=1, b=10) #hypothetical ground truth botOutput = TruncatedNormal('botOutput', mu=mu, tau=t, a=1, b=10) humanOutput = TruncatedNormal('humanOutput', mu=mu, tau=t, a=1, b=10) #when we have data from the model we can use this here #like this d = pymc.Binomial(‘d’, n=n, p=theta, value=np.array([0.,1.,3.,5.]), observed=True) sim = MCMC([mu, botOutput, humanOutput]) sim.sample(sampleSize, 0, 1) botOutput = sim.trace("botOutput")[:] #if humans only give ratings at the 0.5 interval, not smaller # humanOutput = round_to_half(sim.trace("humanOutput")[:]) humanOutput = sim.trace("humanOutput")[:] #difference of the means #but what we care about is the mean of the human output for each image. difference = botOutput - humanOutput.mean() return difference
def night_missing_model(): # Mean mu = night_mean # Tau tau = night_tau # first cutoff cutoff_a = 0 # second cutoff cutoff_b = 22.0 masked_values = np.ma.masked_values(full_data_night_wind_speed, value=None) print masked_values.mask.sum() wind_speed_day = TruncatedNormal('nws', mu, tau, cutoff_a, cutoff_b, value=masked_values, observed=True) return locals()
def day_missing_model(): # Mean mu = day_mean # Tau tau = day_tau # first cutoff cutoff_a = 0 print cutoff_a # second cutoff cutoff_b = 27.0 print cutoff_b masked_values = np.ma.masked_values(full_data_day_wind_speed, value=None) print masked_values.mask.sum() print masked_values.data.max() wind_speed_day = TruncatedNormal('dws', mu, tau, a=cutoff_a, b=cutoff_b, value=masked_values, observed=True) return locals()
panstarrs_array = np.array([None,None,None,None,0,0,1,4,1,2,0,2,2]) panstarrs_array = np.ma.masked_equal(panstarrs_array, value=None) # neowise only on occasionally neowise_array = np.array([None,None,None,None,3,4,0,None,None,None,None,None,2]) neowise_array = np.ma.masked_equal(neowise_array, value=None) other_array = np.array([0,0,0,2,0,0,0,0,0,0,0,1,0]) #linear_mean = Exponential('linear_mean', beta=1., value=1./np.average(linear_array)) #spacewatch_mean = Exponential('spacewatch_mean', beta=1., value=1./np.average(spacewatch_array)) #catalina_mean = Exponential('catalina_mean', beta=1., value=1./np.average(catalina_array)) #panstarrs_mean = Exponential('panstarrs_mean', beta=1., value=1.25) #other_mean = Exponential('other_mean', beta=1., value=1./np.average(other_array)) # produces basically the same result as the exponential priors mu = np.average(linear_array) linear_mean = TruncatedNormal('linear_mean', a=0, b=10, mu=mu, tau=1., value=mu) mu = np.average(spacewatch_array) spacewatch_mean = TruncatedNormal('spacewatch_mean', a=0, b=10, mu=mu, tau=1., value=mu) mu = np.average(catalina_array) catalina_mean = TruncatedNormal('catalina_mean', a=0, b=10, mu=mu, tau=1., value=mu) mu = 1.25 panstarrs_mean = TruncatedNormal('panstarrs_mean', a=0, b=10, mu=mu, tau=1., value=mu) mu = 2.25 neowise_mean = TruncatedNormal('neowise_mean', a=0, b=10, mu=mu, tau=1., value=mu) mu = np.average(other_array) other_mean = TruncatedNormal('other_mean', a=0, b=10, mu=mu, tau=1., value=mu) linear_hits = Poisson('linear_hits', mu=linear_mean, value=linear_array, observed=True) spacewatch_hits = Poisson('spacewatch_hits', mu=spacewatch_mean, value=spacewatch_array, observed=True)
def CreateFullDetectorModel(detector, waveforms, startGuess, b_over_a0, c0, d0, rc0): n_waveforms = len(waveforms) sample_length = len(waveforms[0].windowedWf) #detector-wide params tempEst = TruncatedNormal('temp', mu=startGuess['temp'], tau=sigToTau(2.), value=startGuess['temp'], a=40, b=120) grad = Uniform('grad', lower=detector.gradList[0], upper=detector.gradList[-1], value=startGuess['grad']) pcRad = Uniform('pcRad', lower=detector.pcRadList[0], upper=detector.pcRadList[-1], value=startGuess['pcRad']) pcLen = Uniform('pcLen', lower=detector.pcLenList[0], upper=detector.pcLenList[-1], value=startGuess['pcLen']) # grad = TruncatedNormal('grad', a=detector.gradList[0], b=detector.gradList[-1], value=startGuess['grad'], mu=startGuess['grad'],tau=sigToTau(0.03) ) # pcRad = TruncatedNormal('pcRad', a=detector.pcRadList[0], b=detector.pcRadList[-1],value=startGuess['pcRad'], mu=startGuess['pcRad'],tau=sigToTau(0.2) ) # pcLen = TruncatedNormal('pcLen', a=detector.pcLenList[0], b=detector.pcLenList[-1], value=startGuess['pcLen'], mu=startGuess['pcLen'],tau=sigToTau(0.2) ) b_over_a = Normal('b_over_a', mu=b_over_a0, tau=sigToTau(.5), value=b_over_a0) c = Normal('c', mu=c0, tau=sigToTau(0.2), value=c0) d = Normal('d', mu=d0, tau=sigToTau(0.2), value=d0) rc = Normal('rc', mu=rc0, tau=sigToTau(5), value=rc0) #Make an array of priors for each waveform-specific parameter radiusArray = np.empty(n_waveforms, dtype=object) zArray = np.empty(n_waveforms, dtype=object) phiArray = np.empty(n_waveforms, dtype=object) scaleArray = np.empty(n_waveforms, dtype=object) t0Array = np.empty(n_waveforms, dtype=object) sigArray = np.empty(n_waveforms, dtype=object) for idx in range(n_waveforms): radiusArray[idx] = (TruncatedNormal('radEst_%d' % idx, mu=3, a=0, b=detector.detector_radius, value=startGuess['radEst'][idx])) zArray[idx] = (TruncatedNormal('zEst_%d' % idx, mu=3, a=0, b=detector.detector_length, value=startGuess['zEst'][idx])) phiArray[idx] = (Uniform('phiEst_%d' % idx, lower=0, upper=np.pi / 4, value=startGuess['phiEst'][idx])) scaleArray[idx] = (Normal('wfScale_%d' % idx, mu=startGuess['wfScale'][idx], tau=sigToTau(0.01 * startGuess['wfScale'][idx]), value=startGuess['wfScale'][idx])) t0Array[idx] = (Normal('switchpoint_%d' % idx, mu=startGuess['switchpoint'][idx], tau=sigToTau(5.), value=startGuess['switchpoint'][idx])) sigArray[idx] = (Normal('sigma_%d' % idx, mu=startGuess['smooth'][idx], tau=sigToTau(3), value=startGuess['smooth'][idx])) #This is a deterministic (implicitly? is this a problem?) def siggen_model(s, rad, phi, z, e, smooth, temp, b_over_a, c, d, rc, grad, pc_rad, pc_len, fit_length): if s < 0 or s >= fit_length: return np.ones(fit_length) * -np.inf # if smooth<0: # return np.ones(fit_length)*-np.inf if not detector.IsInDetector(rad, phi, z): return -np.inf * np.ones(fit_length) if temp < 40 or temp > 120: return np.ones(fit_length) * -np.inf if (grad > detector.gradList[-1]) or (grad < detector.gradList[0]): return np.ones(fit_length) * -np.inf if (pc_rad > detector.pcRadList[-1]) or (pc_rad < detector.pcRadList[0]): return np.ones(fit_length) * -np.inf if (pc_len > detector.pcLenList[-1]) or (pc_len < detector.pcLenList[0]): return np.ones(fit_length) * -np.inf detector.SetTransferFunction(b_over_a, c, d, rc) detector.SetTemperature(temp) if detector.pcRad != pc_rad or detector.pcLen != pc_len or detector.impurityGrad != grad: detector.SetFields(pc_rad, pc_len, grad) siggen_wf = detector.MakeSimWaveform(rad, phi, z, e, s, fit_length, h_smoothing=None) if siggen_wf is None: return np.ones(fit_length) * -np.inf # plt.ion() # plt.figure(14) # plt.clf() # plt.plot(siggen_wf) # for (i, wf) in enumerate(waveforms): # plt.plot(wf.windowedWf, color="r") # print "Detector parameters: " # print " temp = %0.3f" % temp # print " zero_1 = %f" % zero_1 # print " pole_1 = %f" % pole_1 # print " pole_real = %f" % pole_real # print " pole_imag = %f" % pole_imag # print " grad = %0.3f" % grad # print " pc_rad = %0.3f" % pc_rad # print " pc_len = %0.3f" % pc_len # # print "Waveform parameters: " # print " (r,phi,z) = (%0.2f,%0.3f,%0.2f)" % (rad,phi,z) # print " e = %0.3f" % e # print " smooth = %0.3f" % smooth # print " t0 = %0.3f" % s # value = raw_input(' --> Press q to quit, any other key to continue\n') # plt.ioff() return siggen_wf baseline_observed = np.empty(n_waveforms, dtype=object) baseline_sim = np.empty(n_waveforms, dtype=object) for (i, wf) in enumerate(waveforms): baseline_sim[i] = Deterministic(eval=siggen_model, doc='siggen wf %d' % i, name='siggen_model_%d' % i, parents={ 's': t0Array[i], 'rad': radiusArray[i], 'phi': phiArray[i], 'z': zArray[i], 'e': scaleArray[i], 'smooth': sigArray[i], 'temp': tempEst, 'b_over_a': b_over_a, 'c': c, 'd': d, 'rc': rc, 'grad': grad, 'pc_rad': pcRad, 'pc_len': pcLen, 'fit_length': wf.wfLength }, trace=False, plot=False) baseline_observed[i] = Normal("baseline_observed_%d" % i, mu=baseline_sim[i], tau=sigToTau(wf.baselineRMS), observed=True, value=wf.windowedWf) return locals()
def createWaveformModel(detector, waveform, startGuess): furthest_point = np.sqrt(detector.detector_radius**2 + detector.detector_length**2) radEst = TruncatedNormal('radEst', mu=startGuess['radEst'], a=0, b=furthest_point, tau=sigToTau(2), value=startGuess['radEst']) thetaEst = TruncatedNormal('thetaEst', mu=startGuess['thetaEst'], a=0, b=np.pi / 2, tau=sigToTau(0.2), value=startGuess['thetaEst']) phiEst = Uniform('phiEst', lower=0, upper=np.pi / 4, value=startGuess['phiEst']) scaleEst = Normal('wfScale', mu=startGuess['wfScale'], tau=sigToTau(0.01 * startGuess['wfScale']), value=startGuess['wfScale']) t0Est = Normal('switchpoint', mu=startGuess['switchpoint'], tau=sigToTau(5.), value=startGuess['switchpoint']) sigEst = Normal('sigma', mu=startGuess['smooth'], tau=sigToTau(3), value=startGuess['smooth']) fit_length = waveform.wfLength def siggen_model(s, r, theta, phi, e, smooth): if s < 0 or s >= fit_length: return np.ones(fit_length) * -np.inf if smooth < 0: return np.ones(fit_length) * -np.inf rad = np.cos(theta) * r z = np.sin(theta) * r if not detector.IsInDetector(rad, phi, z): return -np.inf * np.ones(fit_length) siggen_wf = detector.MakeSimWaveform(rad, phi, z, e, s, fit_length, h_smoothing=smooth) if siggen_wf is None: return np.ones(fit_length) * -np.inf # plt.ion() # plt.figure(14) # plt.clf() # plt.plot(siggen_wf) # plt.plot(waveform.windowedWf, color="r") # # print "Waveform parameters: " # print " (r,phi,z) = (%0.2f,%0.3f,%0.2f)" % (rad,phi,z) # print " e = %0.3f" % e # print " smooth = %0.3f" % smooth # print " t0 = %0.3f" % s # value = raw_input(' --> Press q to quit, any other key to continue\n') # plt.ioff() return siggen_wf baseline_sim = Deterministic(eval=siggen_model, doc='siggen wf', name='siggen_model', parents={ 's': t0Est, 'r': radEst, 'phi': phiEst, 'theta': thetaEst, 'e': scaleEst, 'smooth': sigEst }, trace=False, plot=False) baseline_observed = Normal('baseline_observed', mu=baseline_sim, tau=sigToTau(waveform.baselineRMS * 0.5773), observed=True, value=waveform.windowedWf) return locals()
#note that tau is not sigm! #sigma^2=1/tau taus = 1 / variances df = pd.DataFrame(columns=[ "variance", "sampleSize", "botMean", "humanMean", "bias", "lowerHDI", "upperHDI", "probabilityInROPE" ]) #what is the probability that an analyst would give this image the same rating? i = 1 for t in taus: # the bot now varies mu = TruncatedNormal('mu', mu=4.5, tau=t, a=1, b=10) #hypothetical ground truth botOutput = TruncatedNormal('botOutput', mu=mu, tau=t, a=1, b=10) humanOutput = TruncatedNormal('humanOutput', mu=mu, tau=t, a=1, b=10) #when we have data from the model we can use this here #like this d = pymc.Binomial(‘d’, n=n, p=theta, value=np.array([0.,1.,3.,5.]), observed=True) sim = MCMC([mu, botOutput, humanOutput]) for s in sampleSize: #get s number of samples, no burn in for this- not optimizing anything print 'sample size: {} | variance: {} '.format(s, t) sim.sample(s, 0, 1) #assume no bias at this point but we can add bias later b = 0 #computer is now varying botOutput = sim.trace("botOutput")[:]
import matplotlib.pyplot as plt import matplotlib np.random.seed(123) #NIIRS has a scale of 0-8 lower, upper = 0, 10 mu, sigma = 4.5, 1.5 #use a truncated normal random variable ###this normalizes our bounds- but i don't think we want this### #[a,b] =(lower - mu) / sigma, (upper - mu) / sigma ###this may not be what we actually want to use as upper and lower from pymc import TruncatedNormal, HalfNormal, Normal, Model, MCMC, Metropolis, Uniform mu_dist = TruncatedNormal('mu_dist', mu=mu, tau=sigma, a=lower, b=upper) sigma_dist = TruncatedNormal('sigma_dist', mu=0.2, a=0, b=10) #use a half-normal since sd is always positive, had sd=1, maybe for pymc3 Y_obs= TruncatedNormal('Y_obs', mu=mu_dist, tau=sigma_dist, a=lower, b=upper) #, observed=True) this was giving error- must have an initial value if observed=True sim=MCMC([mu_dist, sigma_dist, Y_obs]) sim.sample(50000, 10000, 1) y_samples = sim.trace("Y_obs")[:] fig = plt.figure(figsize=(5,5)) axes = fig.add_subplot(111) axes.hist(y_samples, bins=50, normed=True, color="blue"); fig.show() mu_samples = sim.trace("mu_dist")[:] fig = plt.figure(figsize=(5,5))