def _rotcurve_kd_worker(num_samples,glong=None,velo=None, velo_err=None,rotcurve='reid14_rotcurve', rotcurve_dist_res=1.e-2, rotcurve_dist_max=30.): """ Multiprocessing worker for pdf_kd. Resamples velocity and runs rotcurve_kd. Parameters: num_samples : integer number of samples this worker should run glong : scalar or 1-D array Galactic longitude (deg). If it is an array, it must have the same size as velo. velo : scalar or 1-D array LSR velocity (km/s). If it is an array, it must have the same size as glong. velo_err : scalar or 1-D array (optional) LSR velocity uncertainty (km/s). If it is an array, it must have the same size as velo. Otherwise, this uncertainty is applied to all velos. rotcurve : string (optional) rotation curve model rotcurve_dist_res : scalar (optional) line-of-sight distance resolution when calculating kinematic distance with rotcurve_kd (kpc) rotcurve_dist_max : scalar (optional) maximum line-of-sight distance when calculating kinematic distance with rotcurve_kd (kpc) Returns: output (same as rotcurve_kd) Raises: ValueError : if glong and velo are not 1-D; or if glong and velo are arrays and not the same size """ # # check inputs # # convert scalar to array if necessary glong_inp, velo_inp = np.atleast_1d(glong, velo) # check shape of inputs if glong_inp.ndim != 1 or velo_inp.ndim != 1: raise ValueError("glong and velo must be 1-D") if glong_inp.size != velo_inp.size: raise ValueError("glong and velo must have same size") # # Re-sample velocities # if velo_err is not None: velo_resample = \ np.random.normal(loc=velo_inp,scale=velo_err, size=(num_samples,velo_inp.size)).T else: velo_resample = np.ones((num_samples,velo_inp.size)) velo_resample = (velo_resample*velo_inp).T # # Calculate kinematic distance for each l,v point # kd_out = [rotcurve_kd(np.ones(num_samples)*l,v, rotcurve=rotcurve, dist_res=rotcurve_dist_res, dist_max=rotcurve_dist_max, resample=True) for (l,v) in zip(glong_inp,velo_resample)] return kd_out
def pdf_kd(glong, glat, velo, velo_err=None, rotcurve='wc21_rotcurve', rotcurve_dist_res=0.001, rotcurve_dist_max=30., pdf_bins=1000, num_samples=10000, processes=None, plot_pdf=False, plot_prefix='pdf_', peculiar=False, use_kriging=False, norm=20): """ Return the kinematic near, far, and tanget distance and distance uncertainties for a given Galactic longitude and LSR velocity assuming a given rotation curve. Generate distance posteriors by resampling within rotation curve parameter and velocity uncertainties. Peak of posterior and 68.3% minimum width Bayesian credible interval (BCI) are returned. Parameters: glong, glat :: scalar or array of scalars Galactic longitude and latitude (deg). velo :: scalar or array of scalars LSR velocity (km/s). velo_err :: scalar or array of scalars (optional) LSR velocity uncertainty (km/s). If it is an array, it must have the same size as velo. Otherwise, this scalar uncertainty is applied to all velos. rotcurve :: string (optional) rotation curve model rotcurve_dist_res :: scalar (optional) line-of-sight distance resolution when calculating kinematic distance with rotcurve_kd (kpc) rotcurve_dist_max :: scalar (optional) maximum line-of-sight distance when calculating kinematic distance with rotcurve_kd (kpc) pdf_bins :: integer (optional) number of bins used to calculate PDF num_samples :: integer (optional) Number of MC samples to use when generating PDF processes :: integer (optional) Number of simultaneous workers to use If None, automatically assign workers based on system's core count plot_pdf :: bool (optional) If True, plot each PDF. Filenames are: plot_prefix+"{glong}_{velo}.pdf". plot_prefix :: string (optional) The prefix for the plot filenames. peculiar :: boolean (optional) Only supported for "wc21_rotcurve" and "reid19_rotcurve" If True, include HMSFR peculiar motion component use_kriging :: boolean (optional) Only supported for rotcurve = "wc21_rotcurve" If True, estimate individual Upec & Vpec from kriging program If False, use average Upec & Vpec norm :: scalar (optional) Normalization factor that determines slope of kriging to average peculiar motion transition. Larger norm is steeper transition Returns: output output["Rgal"] :: scalar or array of scalars Galactocentric radius (kpc). output["Rtan"] :: scalar or array of scalars Galactocentric radius of tangent point (kpc). output["near"] :: scalar or array of scalars kinematic near distance (kpc) output["far"] :: scalar or array of scalars kinematic far distance (kpc) output["distance"] :: scalar or array of scalars kinematic distance (near and far combined) (kpc) output["tangent"] :: scalar or array of scalars kinematic tangent distance (kpc) output["vlsr_tangent"] :: scalar or array of scalars LSR velocity of tangent point (km/s) Each of these values is the mode of the posterior distribution. Also included in the dictionary for each of these parameters are param+"_err_neg" and param+"_err_pos", which define the 68.3% Bayesian credible interval around the mode, and param+"_kde", which is the kernel density estimator fit to the posterior samples. Raises: ValueError : if glong and velo are not the same shape if velo_err is an array and not the same shape as glong and velo """ # # check inputs # # check shape of inputs input_scalar = np.isscalar(glong) glong, glat, velo = np.atleast_1d(glong, glat, velo) if glong.shape != velo.shape: raise ValueError("glong and velo must have same shape") if (velo_err is not None and not np.isscalar(velo_err) and velo_err.shape != velo.shape): raise ValueError("velo_err must be scalar or have same shape as velo") # # Storage for final PDF kinematic distance results # results = { "Rgal": np.zeros(glong.shape), "Rgal_kde": np.empty(shape=glong.shape, dtype=object), "Rgal_err_neg": np.zeros(glong.shape), "Rgal_err_pos": np.zeros(glong.shape), "Rtan": np.zeros(glong.shape), "Rtan_kde": np.empty(shape=glong.shape, dtype=object), "Rtan_err_neg": np.zeros(glong.shape), "Rtan_err_pos": np.zeros(glong.shape), "near": np.zeros(glong.shape), "near_kde": np.empty(shape=glong.shape, dtype=object), "near_err_neg": np.zeros(glong.shape), "near_err_pos": np.zeros(glong.shape), "far": np.zeros(glong.shape), "far_kde": np.empty(shape=glong.shape, dtype=object), "far_err_neg": np.zeros(glong.shape), "far_err_pos": np.zeros(glong.shape), "distance": np.zeros(glong.shape), "distance_kde": np.empty(shape=glong.shape, dtype=object), "distance_err_neg": np.zeros(glong.shape), "distance_err_pos": np.zeros(glong.shape), "tangent": np.zeros(glong.shape), "tangent_kde": np.empty(shape=glong.shape, dtype=object), "tangent_err_neg": np.zeros(glong.shape), "tangent_err_pos": np.zeros(glong.shape), "vlsr_tangent": np.zeros(glong.shape), "vlsr_tangent_kde": np.empty(shape=glong.shape, dtype=object), "vlsr_tangent_err_neg": np.zeros(glong.shape), "vlsr_tangent_err_pos": np.zeros(glong.shape) } # # Calculate rotcurve kinematic distance # kd_out = rotcurve_kd(glong, glat, velo, velo_err=velo_err, velo_tol=0.1, rotcurve=rotcurve, dist_res=rotcurve_dist_res, dist_min=0.01, dist_max=rotcurve_dist_max, resample=True, size=num_samples, processes=processes, peculiar=peculiar, use_kriging=use_kriging, norm=norm) # # Set up multiprocessing for fitting KDEs # kdtypes = ["Rgal", "Rtan", "near", "far", "tangent", "vlsr_tangent"] kdetypes = ["pyqt", "pyqt", "pyqt", "pyqt", "pyqt", "scipy"] args = [] for kdtype, kdetype in zip(kdtypes, kdetypes): for i in np.ndindex(glong.shape): args.append((kd_out[kdtype][i], kdetype, 0.683, pdf_bins)) # # Also, distance (near + far) # check if both are nan -> use tangent distance # kdtypes += ["distance"] kdetypes += ["pyqt"] for i in np.ndindex(glong.shape): is_tangent = np.isnan(kd_out['near'][i]) * np.isnan(kd_out['far'][i]) samples = kd_out['tangent'][i][is_tangent] samples = np.concatenate((samples, kd_out['near'][i][~is_tangent], kd_out['far'][i][~is_tangent])) args.append((samples, 'pyqt', 0.683, pdf_bins)) # # Get results # nresult = 0 with mp.Pool(processes=processes) as pool: print("Number of pdf_kd processes:", pool._processes) kde_results = pool.map(calc_hpd_wrapper, args) print("Closing pool in pdf_kd") pool.close() pool.join() for kdtype, kdetype in zip(kdtypes, kdetypes): for i in np.ndindex(glong.shape): # TODO: fix distance PDF to calculate HPD centred around tangent distance kde, mode, lower, upper = kde_results[nresult] results[kdtype][i] = mode results[kdtype + "_kde"][i] = kde results[kdtype + "_err_neg"][i] = mode - lower results[kdtype + "_err_pos"][i] = upper - mode nresult += 1 # # Plot PDFs # if plot_pdf: for i in np.ndindex(glong.shape): # # Set-up figure # fig, axes = plt.subplots(7, figsize=(8.5, 11)) axes[0].set_title(r"PDFs for ($\ell$, $v$) = (" "{0:.1f}".format(glong[i]) + r"$^\circ$, " "{0:.1f}".format(velo[i]) + r" km s$^{-1}$)") # # Compute "traditional" kinematic distances # print("Computing traditional KD") rot_kd = rotcurve_kd(glong[i], glat[i], velo[i], rotcurve=rotcurve, dist_res=rotcurve_dist_res, dist_max=rotcurve_dist_max, peculiar=peculiar, use_kriging=use_kriging) kdtypes = [ "Rgal", "Rtan", "near", "far", "distance", "tangent", "vlsr_tangent" ] labels = [ r"$R$ (kpc)", r"$R_{\rm tan}$ (kpc)", r"$d_{\rm near}$ (kpc)", r"$d_{\rm far}$ (kpc)", r"$d$ (kpc)", r"$d_{\rm tan}$ (kpc)", r"$v_{\rm tan}$ (km s$^{-1}$)" ] for ax, kdtype, label in zip(axes, kdtypes, labels): if kdtype == 'distance': is_tangent = np.isnan(kd_out['near'][i]) * np.isnan( kd_out['far'][i]) out = kd_out['tangent'][i][is_tangent] out = np.concatenate((out, kd_out['near'][i][~is_tangent], kd_out['far'][i][~is_tangent])) else: out = kd_out[kdtype][i] peak = results[kdtype][i] kde = results[kdtype + "_kde"][i] err_neg = results[kdtype + "_err_neg"][i] err_pos = results[kdtype + "_err_pos"][i] # find bad data out = out[~np.isnan(out)] # skip if kde failed (all data is bad) if kde is None: continue # set-up bins binwidth = (np.max(out) - np.min(out)) / 20. bins = np.arange(np.min(out), np.max(out) + binwidth, binwidth) distwidth = (np.max(out) - np.min(out)) / 200. dists = np.arange(np.min(out), np.max(out) + distwidth, distwidth) pdf = kde(dists) ax.hist(out, bins=bins, density=True, facecolor='white', edgecolor='black', lw=2, zorder=1) ax.plot(dists, pdf, 'k-', zorder=3) err_dists = np.arange(peak - err_neg, peak + err_pos, distwidth) err_pdf = kde(err_dists) ax.fill_between(err_dists, 0, err_pdf, color='gray', alpha=0.5, zorder=2) ax.axvline(peak, linestyle='solid', color='k', zorder=3) if kdtype == 'distance': ax.axvline(rot_kd['near'], linestyle='dashed', color='k', zorder=3) ax.axvline(rot_kd['far'], linestyle='dashed', color='k', zorder=3) else: ax.axvline(rot_kd[kdtype], linestyle='dashed', color='k', zorder=3) ax.set_xlabel(label) ax.set_ylabel("Normalized PDF") ax.set_xlim(np.min(out), np.max(out)) # turn off grid ax.grid(False) plt.tight_layout() fname = "{0}{1}_{2}.pdf".format(plot_prefix, glong[i], velo[i]) plt.savefig(fname) plt.close(fig) if input_scalar: for key in results: results[key] = results[key][0] return results
def pdf_kd(glong,velo,velo_err=None, rotcurve='reid14_rotcurve', rotcurve_dist_res=1.e-3, rotcurve_dist_max=30., pdf_bins=100, num_samples=1000, num_cpu=mp.cpu_count(),chunksize=10, plot_pdf=False,plot_prefix='pdf_',verbose=True): """ Return the kinematic near, far, and tanget distance and distance uncertainties for a given Galactic longitude and LSR velocity assuming a given rotation curve. Generate PDF of distances by resampling within rotation curve parameter and velocity uncertainties. Peak of PDF is the returned distance and width of PDF such that the area enclosed by the PDF is 68.2% is the returned distance uncertainty. Parameters: glong : scalar or 1-D array Galactic longitude (deg). If it is an array, it must have the same size as velo. velo : scalar or 1-D array LSR velocity (km/s). If it is an array, it must have the same size as glong. velo_err : scalar or 1-D (optional) LSR velocity uncertainty (km/s). If it is an array, it must have the same size as velo. Otherwise, this uncertainty is applied to all velos. rotcurve : string (optional) rotation curve model rotcurve_dist_res : scalar (optional) line-of-sight distance resolution when calculating kinematic distance with rotcurve_kd (kpc) rotcurve_dist_max : scalar (optional) maximum line-of-sight distance when calculating kinematic distance with rotcurve_kd (kpc) pdf_bins : integer (optional) number of bins used to calculate PDF num_samples : integer (optional) Number of MC samples to use when generating PDF num_cpu : integer (optional) Number of CPUs to use in multiprocessing. If 0, do not use multiprocessing. chunksize : integer (optional) Number of tasks per CPU in multiprocessing. plot_pdf : bool (optional) If True, plot each PDF. Filenames are plot_prefix+"{0}glong_{1}velo.pdf". plot_prefix : string (optional) The prefix for the plot filenames. verbose : bool (optional) If True, output status updates and total runtime Returns: output output["Rgal"] : scalar or 1-D array Galactocentric radius (kpc). output["Rgal_err_neg"] : scalar or 1-D array Galactocentric radius uncertainty in the negative direction (kpc). output["Rgal_err_pos"] : scalar or 1-D array Galactocentric radius uncertainty in the positive direction (kpc). output["Rtan"] : scalar or 1-D array Galactocentric radius of tangent point (kpc). output["Rtan_err_neg"] : scalar or 1-D array Galactocentric radius of tangent point uncertainty in the negative direction (kpc). output["Rtan_err_pos"] : scalar or 1-D array Galactocentric radius of tangent point uncertainty in the positive direction (kpc). output["near"] : scalar or 1-D array kinematic near distance (kpc) output["near_err_neg"] : scalar or 1-D array kinematic near distance uncertainty in the negative direction (kpc) output["near_err_pos"] : scalar or 1-D array kinematic near distance uncertainty in the positive direction (kpc) output["far"] : scalar or 1-D array kinematic far distance (kpc) output["far_err_neg"] : scalar or 1-D array kinematic far distance uncertainty in the negative direction (kpc) output["far_err_pos"] : scalar or 1-D array kinematic far distance uncertainty in the positive direction (kpc) output["tangent"] : scalar or 1-D array kinematic tangent distance (kpc) output["tangent_err_neg"] : scalar or 1-D array kinematic tangent distance uncertainty in the negative direction (kpc) output["tangent_err_pos"] : scalar or 1-D array kinematic tangent distance uncertainty in the positive direction (kpc) output["vlsr_tangent"] : scalar or 1-D array LSR velocity of tangent point (km/s) output["vlsr_tangent_err_neg"] : scalar or 1-D array LSR velocity of tangent uncertainty in the negative direction (km/s) output["vlsr_tangent_err_pos"] : scalar or 1-D array LSR velocity of tangent uncertainty in the positive direction (km/s) If glong and velo are scalars, each of these is a scalar. Otherwise they have shape (velo.size). Raises: ValueError : if glong and velo are not 1-D; or if glong and velo are arrays and not the same size """ total_start = time.time() # # check inputs # # convert scalar to array if necessary glong_inp, velo_inp = np.atleast_1d(glong, velo) # check shape of inputs if glong_inp.ndim != 1 or velo_inp.ndim != 1: raise ValueError("glong and velo must be 1-D") if glong_inp.size != velo_inp.size: raise ValueError("glong and velo must have same size") # # Set-up multiprocesing for rotcurve re-sampling # num_chunks = int(num_samples/chunksize) jobs = [chunksize for c in range(num_chunks)] worker = partial(_rotcurve_kd_worker, glong=glong_inp,velo=velo_inp, velo_err=velo_err,rotcurve=rotcurve, rotcurve_dist_res=rotcurve_dist_res, rotcurve_dist_max=rotcurve_dist_max) if num_samples % chunksize > 0: jobs = jobs + [num_samples % chunksize] if num_cpu > 0: # # Calculate rotcurve kinematic distance for each l,v point in # parallel # if verbose: print("Starting multiprocessing for rotation curve " "re-sampling...") pool = mp.Pool(processes=num_cpu) result = pool.map_async(worker,jobs,chunksize=1) if verbose: kd_utils.pool_wait(result,len(jobs),1) else: while not result.ready(): time.sleep(1) pool.close() pool.join() result = result.get() else: # # Calculate rotcurve kinematic distance in series # result = [worker(job) for job in jobs] # # Concatenate results from multiprocessing # kd_out = [{"Rgal":np.array([]),"Rtan":np.array([]), "near":np.array([]),"far":np.array([]), "tangent":np.array([]),"vlsr_tangent":np.array([])} for i in range(glong_inp.size)] for r in result: for i in range(glong_inp.size): for kdtype in ("Rgal","Rtan","near","far","tangent", "vlsr_tangent"): kd_out[i][kdtype] = \ np.append(kd_out[i][kdtype],r[i][kdtype]) # # Storage for final PDF kinematic distance results # results = {"Rgal": np.zeros(glong_inp.size), "Rgal_kde": np.empty(shape=(glong_inp.size,), dtype=object), "Rgal_err_neg": np.zeros(glong_inp.size), "Rgal_err_pos": np.zeros(glong_inp.size), "Rtan": np.zeros(glong_inp.size), "Rtan_kde": np.empty(shape=(glong_inp.size,), dtype=object), "Rtan_err_neg": np.zeros(glong_inp.size), "Rtan_err_pos": np.zeros(glong_inp.size), "near": np.zeros(glong_inp.size), "near_kde": np.empty(shape=(glong_inp.size,), dtype=object), "near_err_neg": np.zeros(glong_inp.size), "near_err_pos": np.zeros(glong_inp.size), "far": np.zeros(glong_inp.size), "far_kde": np.empty(shape=(glong_inp.size,), dtype=object), "far_err_neg": np.zeros(glong_inp.size), "far_err_pos": np.zeros(glong_inp.size), "tangent": np.zeros(glong_inp.size), "tangent_kde": np.empty(shape=(glong_inp.size,), dtype=object), "tangent_err_neg": np.zeros(glong_inp.size), "tangent_err_pos": np.zeros(glong_inp.size), "vlsr_tangent": np.zeros(glong_inp.size), "vlsr_tangent_kde": np.empty(shape=(glong_inp.size,), dtype=object), "vlsr_tangent_err_neg": np.zeros(glong_inp.size), "vlsr_tangent_err_pos": np.zeros(glong_inp.size)} # # Set up multiprocessing for PDF kinematic distance calculation # jobs = [] for i,out in enumerate(kd_out): for kdtype,kdetype in \ zip(["Rgal","Rtan","near","far","tangent","vlsr_tangent"], ["pyqt","pyqt","pyqt","pyqt","pyqt","scipy"]): jobs.append((out[kdtype],i,kdtype,kdetype)) worker = partial(_pdf_kd_results_worker, pdf_bins=pdf_bins) if num_cpu > 0: # # Calculate PDF kinematic distance for each l,v point in # parallel # if verbose: print("Starting multiprocessing for PDF kinematic " "distance calculation...") pool = mp.Pool(processes=num_cpu) result = pool.map_async(worker,jobs,chunksize=1) if verbose: kd_utils.pool_wait(result,len(jobs),1) else: while not result.ready(): time.sleep(1) pool.close() pool.join() result = result.get() else: # # Calculate PDF kinematic distance in series # result = [worker(job) for job in jobs] # # Unpack results and save # for r in result: kd_out_ind, kdtype, kde, peak_dist, peak_dist_err_neg, \ peak_dist_err_pos = r results[kdtype][kd_out_ind] = peak_dist results[kdtype+"_kde"][kd_out_ind] = kde results[kdtype+"_err_neg"][kd_out_ind] = peak_dist_err_neg results[kdtype+"_err_pos"][kd_out_ind] = peak_dist_err_pos # # Plot PDFs and results # if plot_pdf: # # Loop over l,v # for i,(l,v) in enumerate(zip(glong_inp,velo_inp)): # # Set-up figure # fig, (ax1, ax2, ax3, ax4, ax5) = \ plt.subplots(5, figsize=(8.5,11)) ax1.set_title(r"PDFs for ($\ell$, $v$) = (" "{0:.1f}".format(l)+r"$^\circ$, " "{0:.1f}".format(v)+r"km s$^{-1}$)") # # Compute "traditional" kinematic distances # rot_kd = rotcurve_kd(l,v,rotcurve=rotcurve, dist_res=rotcurve_dist_res, dist_max=rotcurve_dist_max) kdtypes = ["Rgal","Rtan","near","far","tangent"] labels = [r"$R$ (kpc)",r"$R_{\rm tan}$ (kpc)", r"$d_{\rm near}$ (kpc)",r"$d_{\rm far}$ (kpc)", r"$d_{\rm tan}$ (kpc)"] for ax,kdtype,label in zip([ax1,ax2,ax3,ax4,ax5], kdtypes, labels): # find bad data out = kd_out[i][kdtype] out = out[~np.isnan(out)] # skip if kde failed (all data is bad) if results[kdtype+"_kde"][i] is None: continue # set-up bins binwidth = (np.max(out)-np.min(out))/20. bins = np.arange(np.min(out), np.max(out)+binwidth, binwidth) distwidth = (np.max(out)-np.min(out))/200. dists = np.arange(np.min(out), np.max(out)+distwidth, distwidth) pdf = results[kdtype+"_kde"][i](dists) ax.hist(out,bins=bins,normed=True, facecolor='white',edgecolor='black',lw=2, zorder=1) ax.plot(dists,pdf,'k-',zorder=3) err_dists = \ np.arange(results[kdtype][i]-results[kdtype+"_err_neg"][i], results[kdtype][i]+results[kdtype+"_err_pos"][i], distwidth) err_pdf = results[kdtype+"_kde"][i](err_dists) ax.fill_between(err_dists,0,err_pdf,color='gray', alpha=0.5,zorder=2) ax.axvline(results[kdtype][i],linestyle='solid', color='k',zorder=3) ax.axvline(rot_kd[kdtype],linestyle='dashed', color='k',zorder=3) ax.set_xlabel(label) ax.set_ylabel("Normalized PDF") ax.set_xlim(np.min(out), np.max(out)) # turn off grid ax.grid(False) plt.tight_layout() plt.savefig(plot_prefix+"{0}glong_{1}velo.pdf".format(l,v)) plt.close(fig) # # Convert results to scalar if necessary # if glong_inp.size == 1: for key in results.keys(): results[key] = results[key][0] total_end = time.time() if verbose: run_time = total_end-total_start time_h = int(run_time/3600.) time_m = int((run_time-3600.*time_h)/60.) time_s = int(run_time-3600.*time_h-60.*time_m) print("Total Runtime: {0:02}h {1:02}m {2:02}s".\ format(time_h,time_m,time_s)) return results