d *= gf.qea_latitude_weights() dlam = pca_eigvals_gf(d)[:NUM_EIGVALS] print("[%s] Data analysis DONE." % (str(datetime.now()))) # <markdowncell> # **Show the variance of the data (filtered)** # <markdowncell> # **Show a plot of the model orders** # <codecell> mo = sgf.model_orders() plt = render_component_single(mo, gf.lats, gf.lons, plt_name = 'Model orders of AR surrogates') # <codecell> pool = Pool(POOL_SIZE) log = open('geodata_estimate_component_count-%s.log' % datetime.now().strftime('%Y%m%d-%H%M'), 'w') # storage for three types of surrogates slam_ar = np.zeros((NUM_SURR, NUM_EIGVALS)) slam_w1 = np.zeros((NUM_SURR, NUM_EIGVALS)) slam_f = np.zeros((NUM_SURR, NUM_EIGVALS)) surr_completed = 0 # construct the job queue
# we can duplicate the list here without worry as it will be copied into new python processes # thus creating separate copies of sd print("Running parallel generation of surrogates and SVD") slam_list = pool.map(compute_surrogate_cov_eigvals, [(sd, Ud)] * NUM_SURR) # rearrange into numpy array (can I use vstack for this?) for i in range(len(slam_list)): slam[i, :], maxU[i, :] = slam_list[i] maxU.sort(axis = 0) print("Saving computed spectra ...") # save the results to file with open('data/slp_eigvals_surrogates.bin', 'w') as f: cPickle.dump([dlam, slam, sd.model_orders(), sd.lons, sd.lats], f) plt.figure() plt.plot(np.arange(NUM_EIGS) + 1, dlam, 'ro-') plt.errorbar(np.arange(NUM_EIGS) + 1, np.mean(slam, axis = 0), np.std(slam, axis = 0) * 3, fmt = 'g-') plt.figure() plt.errorbar(np.arange(NUM_EIGS) + 1, np.mean(maxU, axis = 0), np.std(maxU, axis = 0) * 3, fmt = 'g-') plt.plot(np.arange(NUM_EIGS) + 1, np.amax(maxU, axis = 0), 'r-') plt.plot(np.arange(NUM_EIGS) + 1, np.amin(maxU, axis = 0), 'r-') plt.plot(np.arange(NUM_EIGS) + 1, maxU[94, :], 'bo-', linewidth = 2) plt.plot(np.arange(NUM_EIGS) + 1, np.amax(np.abs(Ud), axis = 0), 'kx-', linewidth = 2) plt.show() print("DONE.")
d *= gf.qea_latitude_weights() dlam = pca_eigvals_gf(d)[:NUM_EIGVALS] print("[%s] Data analysis DONE." % (str(datetime.now()))) # <markdowncell> # **Show the variance of the data (filtered)** # <markdowncell> # **Show a plot of the model orders** # <codecell> mo = sgf.model_orders() plt = render_component_single(mo, gf.lats, gf.lons, plt_name='Model orders of AR surrogates') # <codecell> pool = Pool(POOL_SIZE) log = open( 'geodata_estimate_component_count-%s.log' % datetime.now().strftime('%Y%m%d-%H%M'), 'w') # storage for three types of surrogates slam_ar = np.zeros((NUM_SURR, NUM_EIGVALS)) slam_w1 = np.zeros((NUM_SURR, NUM_EIGVALS))
if USE_SURROGATE_MODEL: # HACK to replace original data with surrogates gf.d = sgf.sd.copy() sgf.d = sgf.sd.copy() log("** WARNING ** Replaced synth model with surrogate model to check false positives.") # analyze data & obtain eigvals and surrogates log("Computing eigenvalues of dataset ...") d = gf.data() if COSINE_REWEIGHTING: d *= gf.qea_latitude_weights() dlam = pca_eigvals_gf(d)[:NUM_EIGVALS] log("Rendering orders of fitted AR models.") mo = sgf.model_orders() render_component_single(mo, gf.lats, gf.lons, plt_name = 'Model orders of AR surrogates', fname='%s_ar_model_order%s.png' % (DATA_NAME, SUFFIX)) # construct the job queue jobq = Queue() resq = Queue() for i in range(NUM_SURR): jobq.put(1) for i in range(WORKER_COUNT): jobq.put(None) log("Starting workers") workers = [Process(target = compute_surrogate_cov_eigvals, args = (sgf,jobq,resq)) for i in range(WORKER_COUNT)]