def DD(autocorr, nthreads, binfile, X1, Y1, Z1, weights1=None, periodic=True, X2=None, Y2=None, Z2=None, weights2=None, verbose=False, boxsize=0.0, output_ravg=False, xbin_refine_factor=2, ybin_refine_factor=2, zbin_refine_factor=1, max_cells_per_dim=100, c_api_timer=False, isa=r'fastest', weight_type=None): """ Calculate the 3-D pair-counts corresponding to the real-space correlation function, :math:`\\xi(r)`. If ``weights`` are provided, the resulting pair counts are weighted. The weighting scheme depends on ``weight_type``. .. note:: This module only returns pair counts and not the actual correlation function :math:`\\xi(r)`. See :py:mod:`Corrfunc.utils.convert_3d_counts_to_cf` for computing for computing :math:`\\xi(r)` from the pair counts returned. Parameters ----------- autocorr: boolean, required Boolean flag for auto/cross-correlation. If autocorr is set to 1, then the second set of particle positions are not required. nthreads: integer The number of OpenMP threads to use. Has no effect if OpenMP was not enabled during library compilation. binfile: string or an list/array of floats For string input: filename specifying the ``r`` bins for ``DD``. The file should contain white-space separated values of (rmin, rmax) for each ``r`` wanted. The bins need to be contiguous and sorted in increasing order (smallest bins come first). For array-like input: A sequence of ``r`` values that provides the bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid input specifying **14** (logarithmic) bins between 0.1 and 10.0. This array does not need to be sorted. X1/Y1/Z1: array_like, real (float/double) The array of X/Y/Z positions for the first set of points. Calculations are done in the precision of the supplied arrays. weights1: array_like, real (float/double), optional A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). `weight_type` specifies how these weights are used; results are returned in the `weightavg` field. If only one of weights1 and weights2 is specified, the other will be set to uniform weights. periodic: boolean Boolean flag to indicate periodic boundary conditions. X2/Y2/Z2: array-like, real (float/double) Array of XYZ positions for the second set of points. *Must* be the same precision as the X1/Y1/Z1 arrays. Only required when ``autocorr==0``. weights2: array-like, real (float/double), optional Same as weights1, but for the second set of positions verbose: boolean (default false) Boolean flag to control output of informational messages boxsize: double The side-length of the cube in the cosmological simulation. Present to facilitate exact calculations for periodic wrapping. If boxsize is not supplied, then the wrapping is done based on the maximum difference within each dimension of the X/Y/Z arrays. output_ravg: boolean (default false) Boolean flag to output the average ``r`` for each bin. Code will run slower if you set this flag. Note: If you are calculating in single-precision, ``ravg`` will suffer from numerical loss of precision and can not be trusted. If you need accurate ``ravg`` values, then pass in double precision arrays for the particle positions. (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. max_cells_per_dim: integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is too small relative to the boxsize (and increasing helps the runtime). c_api_timer: boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. isa: string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] Setting isa to ``fastest`` will pick the fastest available instruction set on the current computer. However, if you set ``isa`` to, say, ``avx`` and ``avx`` is not available on the computer, then the code will revert to using ``fallback`` (even though ``sse42`` might be available). Unless you are benchmarking the different instruction sets, you should always leave ``isa`` to the default value. And if you *are* benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. weight_type: string, optional The type of weighting to apply. One of ["pair_product", None]. Default: None. Returns -------- results: Numpy structured array A numpy structured array containing [rmin, rmax, ravg, npairs, weightavg] for each radial bin specified in the ``binfile``. If ``output_ravg`` is not set, then ``ravg`` will be set to 0.0 for all bins; similarly for ``weightavg``. ``npairs`` contains the number of pairs in that bin and can be used to compute the actual :math:`\\xi(r)` by combining with (DR, RR) counts. api_time: float, optional Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time spent within the C library and ignores all python overhead. Example -------- >>> from __future__ import print_function >>> import numpy as np >>> from os.path import dirname, abspath, join as pjoin >>> import Corrfunc >>> from Corrfunc.theory.DD import DD >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)), ... "../theory/tests/", "bins") >>> N = 10000 >>> boxsize = 420.0 >>> nthreads = 4 >>> autocorr = 1 >>> seed = 42 >>> np.random.seed(seed) >>> X = np.random.uniform(0, boxsize, N) >>> Y = np.random.uniform(0, boxsize, N) >>> Z = np.random.uniform(0, boxsize, N) >>> weights = np.ones_like(X) >>> results = DD(autocorr, nthreads, binfile, X, Y, Z, weights1=weights, weight_type='pair_product', output_ravg=True) >>> for r in results: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10d} {4:10.6f}". ... format(r['rmin'], r['rmax'], r['ravg'], ... r['npairs'], r['weightavg'])) # doctest: +NORMALIZE_WHITESPACE 0.167536 0.238755 0.000000 0 0.000000 0.238755 0.340251 0.000000 0 0.000000 0.340251 0.484892 0.000000 0 0.000000 0.484892 0.691021 0.000000 0 0.000000 0.691021 0.984777 0.945372 2 1.000000 0.984777 1.403410 1.340525 10 1.000000 1.403410 2.000000 1.732968 36 1.000000 2.000000 2.850200 2.558878 54 1.000000 2.850200 4.061840 3.564959 208 1.000000 4.061840 5.788530 4.999278 674 1.000000 5.788530 8.249250 7.126673 2154 1.000000 8.249250 11.756000 10.201834 5996 1.000000 11.756000 16.753600 14.517830 17746 1.000000 16.753600 23.875500 20.716017 50252 1.000000 """ try: from Corrfunc._countpairs import countpairs as DD_extn except ImportError: msg = "Could not import the C extension for the 3-D "\ "real-space pair counter." raise ImportError(msg) import numpy as np from warnings import warn from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ is_native_endian from future.utils import bytes_to_native_str # Broadcast scalar weights to arrays if weights1 is not None: weights1 = np.atleast_1d(weights1) if weights2 is not None: weights2 = np.atleast_1d(weights2) if not autocorr: if X2 is None or Y2 is None or Z2 is None: msg = "Must pass valid arrays for X2/Y2/Z2 for "\ "computing cross-correlation" raise ValueError(msg) # If only one set of points has weights, set the other to uniform weights if weights1 is None and weights2 is not None: weights1 = np.ones_like(weights2) if weights2 is None and weights1 is not None: weights2 = np.ones_like(weights1) # Warn about non-native endian arrays if not all(is_native_endian(arr) for arr in [X1, Y1, Z1, weights1, X2, Y2, Z2, weights2]): warn('One or more input array has non-native endianness! A copy will be made with the correct endianness.') X1, Y1, Z1, weights1, X2, Y2, Z2, weights2 = [convert_to_native_endian(arr) for arr in [X1, Y1, Z1, weights1, X2, Y2, Z2, weights2]] # Passing None parameters breaks the parsing code, so avoid this kwargs = {} for k in ['weights1', 'weights2', 'weight_type', 'X2', 'Y2', 'Z2']: v = locals()[k] if v is not None: kwargs[k] = v integer_isa = translate_isa_string_to_enum(isa) rbinfile, delete_after_use = return_file_with_rbins(binfile) extn_results, api_time = DD_extn(autocorr, nthreads, rbinfile, X1, Y1, Z1, periodic=periodic, verbose=verbose, boxsize=boxsize, output_ravg=output_ravg, xbin_refine_factor=xbin_refine_factor, ybin_refine_factor=ybin_refine_factor, zbin_refine_factor=zbin_refine_factor, max_cells_per_dim=max_cells_per_dim, c_api_timer=c_api_timer, isa=integer_isa, **kwargs) if extn_results is None: msg = "RuntimeError occurred" raise RuntimeError(msg) if delete_after_use: import os os.remove(rbinfile) results_dtype = np.dtype([(bytes_to_native_str(b'rmin'), np.float), (bytes_to_native_str(b'rmax'), np.float), (bytes_to_native_str(b'ravg'), np.float), (bytes_to_native_str(b'npairs'), np.uint64), (bytes_to_native_str(b'weightavg'), np.float)]) results = np.array(extn_results, dtype=results_dtype) if not c_api_timer: return results else: return results, api_time
def DDrppi_mocks(autocorr, cosmology, nthreads, pimax, binfile, RA1, DEC1, CZ1, weights1=None, RA2=None, DEC2=None, CZ2=None, weights2=None, is_comoving_dist=False, verbose=False, output_rpavg=False, fast_divide=False, xbin_refine_factor=2, ybin_refine_factor=2, zbin_refine_factor=1, max_cells_per_dim=100, c_api_timer=False, isa=r'fastest', weight_type=None): """ Calculate the 2-D pair-counts corresponding to the projected correlation function, :math:`\\xi(r_p, \pi)`. Pairs which are separated by less than the ``rp`` bins (specified in ``binfile``) in the X-Y plane, and less than ``pimax`` in the Z-dimension are counted. The input positions are expected to be on-sky co-ordinates. This module is suitable for calculating correlation functions for mock catalogs. If ``weights`` are provided, the resulting pair counts are weighted. The weighting scheme depends on ``weight_type``. Returns a numpy structured array containing the pair counts for the specified bins. .. note:: that this module only returns pair counts and not the actual correlation function :math:`\\xi(r_p, \pi)` or :math:`wp(r_p)`. See the utilities :py:mod:`Corrfunc.utils.convert_3d_counts_to_cf` and :py:mod:`Corrfunc.utils.convert_rp_pi_counts_to_wp` for computing :math:`\\xi(r_p, \pi)` and :math:`wp(r_p)` respectively from the pair counts. Parameters ----------- autocorr : boolean, required Boolean flag for auto/cross-correlation. If autocorr is set to 1, then the second set of particle positions are not required. cosmology : integer, required Integer choice for setting cosmology. Valid values are 1->LasDamas cosmology and 2->Planck cosmology. If you need arbitrary cosmology, easiest way is to convert the ``CZ`` values into co-moving distance, based on your preferred cosmology. Set ``is_comoving_dist=True``, to indicate that the co-moving distance conversion has already been done. Choices: 1. LasDamas cosmology. :math:`\\Omega_m=0.25`, :math:`\\Omega_\Lambda=0.75` 2. Planck cosmology. :math:`\\Omega_m=0.302`, :math:`\\Omega_\Lambda=0.698` To setup a new cosmology, add an entry to the function, ``init_cosmology`` in ``ROOT/utils/cosmology_params.c`` and re-install the entire package. nthreads : integer The number of OpenMP threads to use. Has no effect if OpenMP was not enabled during library compilation. pimax : double A double-precision value for the maximum separation along the Z-dimension. Distances along the :math:`\\pi` direction are binned with unit depth. For instance, if ``pimax=40``, then 40 bins will be created along the ``pi`` direction. Only pairs with ``0 <= dz < pimax`` are counted (no equality). binfile: string or an list/array of floats For string input: filename specifying the ``rp`` bins for ``DDrppi_mocks``. The file should contain white-space separated values of (rpmin, rpmax) for each ``rp`` wanted. The bins need to be contiguous and sorted in increasing order (smallest bins come first). For array-like input: A sequence of ``rp`` values that provides the bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid input specifying **14** (logarithmic) bins between 0.1 and 10.0. This array does not need to be sorted. RA1 : array-like, real (float/double) The array of Right Ascensions for the first set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply RA's in [0.0, 360.0]. Calculations are done in the precision of the supplied arrays. DEC1 : array-like, real (float/double) Array of Declinations for the first set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in [-90.0, 90.0]. Must be of same precision type as RA1. CZ1 : array-like, real (float/double) Array of (Speed Of Light * Redshift) values for the first set of points. Code will try to detect cases where ``redshifts`` have been passed and multiply the entire array with the ``speed of light``. If is_comoving_dist is set, then ``CZ1`` is interpreted as the co-moving distance, rather than `cz`. weights1 : array_like, real (float/double), optional A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). `weight_type` specifies how these weights are used; results are returned in the `weightavg` field. If only one of weights1 and weights2 is specified, the other will be set to uniform weights. RA2 : array-like, real (float/double) The array of Right Ascensions for the second set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply RA's in [0.0, 360.0]. Must be of same precision type as RA1/DEC1/CZ1. DEC2 : array-like, real (float/double) Array of Declinations for the second set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in [-90.0, 90.0]. Must be of same precision type as RA1/DEC1/CZ1. CZ2 : array-like, real (float/double) Array of (Speed Of Light * Redshift) values for the second set of points. Code will try to detect cases where ``redshifts`` have been passed and multiply the entire array with the ``speed of light``. If is_comoving_dist is set, then ``CZ2`` is interpreted as the co-moving distance, rather than `cz`. Must be of same precision type as RA1/DEC1/CZ1. weights2 : array-like, real (float/double), optional Same as weights1, but for the second set of positions is_comoving_dist : boolean (default false) Boolean flag to indicate that ``cz`` values have already been converted into co-moving distances. This flag allows arbitrary cosmologies to be used in ``Corrfunc``. verbose : boolean (default false) Boolean flag to control output of informational messages output_rpavg : boolean (default false) Boolean flag to output the average ``rp`` for each bin. Code will run slower if you set this flag. If you are calculating in single-precision, ``rpavg`` will suffer suffer from numerical loss of precision and can not be trusted. If you need accurate ``rpavg`` values, then pass in double precision arrays for the particle positions. fast_divide : boolean (default false) Boolean flag to replace the division in ``AVX`` implementation with an approximate reciprocal, followed by two Newton-Raphson steps. Improves runtime by ~15-20%. Loss of precision is at the 5-6th decimal place. (xyz)bin_refine_factor : integer, default is (2,2,1); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. max_cells_per_dim: integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is too small relative to the boxsize (and increasing helps the runtime). c_api_timer : boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. isa : string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] Setting isa to ``fastest`` will pick the fastest available instruction set on the current computer. However, if you set ``isa`` to, say, ``avx`` and ``avx`` is not available on the computer, then the code will revert to using ``fallback`` (even though ``sse42`` might be available). Unless you are benchmarking the different instruction sets, you should always leave ``isa`` to the default value. And if you *are* benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. weight_type : string, optional The type of weighting to apply. One of ["pair_product", None]. Default: None. Returns -------- results : Numpy structured array A numpy structured array containing [rpmin, rpmax, rpavg, pimax, npairs, weightavg] for each radial bin specified in the ``binfile``. If ``output_ravg`` is not set, then ``rpavg`` will be set to 0.0 for all bins; similarly for ``weightavg``. ``npairs`` contains the number of pairs in that bin and can be used to compute the actual :math:`\\xi(r_p, \pi)` or :math:`wp(rp)` by combining with (DR, RR) counts. api_time : float, optional Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time spent within the C library and ignores all python overhead. Example -------- >>> from __future__ import print_function >>> import numpy as np >>> from os.path import dirname, abspath, join as pjoin >>> import Corrfunc >>> from Corrfunc.mocks.DDrppi_mocks import DDrppi_mocks >>> import math >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)), ... "../mocks/tests/", "bins") >>> N = 100000 >>> boxsize = 420.0 >>> seed = 42 >>> np.random.seed(seed) >>> X = np.random.uniform(-0.5*boxsize, 0.5*boxsize, N) >>> Y = np.random.uniform(-0.5*boxsize, 0.5*boxsize, N) >>> Z = np.random.uniform(-0.5*boxsize, 0.5*boxsize, N) >>> weights = np.ones_like(X) >>> CZ = np.sqrt(X*X + Y*Y + Z*Z) >>> inv_cz = 1.0/CZ >>> X *= inv_cz >>> Y *= inv_cz >>> Z *= inv_cz >>> DEC = 90.0 - np.arccos(Z)*180.0/math.pi >>> RA = (np.arctan2(Y, X)*180.0/math.pi) + 180.0 >>> autocorr = 1 >>> cosmology = 1 >>> nthreads = 2 >>> pimax = 40.0 >>> results = DDrppi_mocks(autocorr, cosmology, nthreads, ... pimax, binfile, RA, DEC, CZ, ... weights1=weights, weight_type='pair_product', ... output_rpavg=True, is_comoving_dist=True) >>> for r in results[519:]: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10.1f}" ... " {4:10d} {5:10.6f}".format(r['rmin'], r['rmax'], ... r['rpavg'], r['pimax'], r['npairs'], r['weightavg'])) ... # doctest: +NORMALIZE_WHITESPACE 11.359969 16.852277 14.285169 40.0 104850 1.000000 16.852277 25.000000 21.181246 1.0 274144 1.000000 16.852277 25.000000 21.190844 2.0 272876 1.000000 16.852277 25.000000 21.183321 3.0 272294 1.000000 16.852277 25.000000 21.188486 4.0 272506 1.000000 16.852277 25.000000 21.170832 5.0 272100 1.000000 16.852277 25.000000 21.165379 6.0 271788 1.000000 16.852277 25.000000 21.175246 7.0 270040 1.000000 16.852277 25.000000 21.187417 8.0 269492 1.000000 16.852277 25.000000 21.172066 9.0 269682 1.000000 16.852277 25.000000 21.182460 10.0 268266 1.000000 16.852277 25.000000 21.170594 11.0 268744 1.000000 16.852277 25.000000 21.178608 12.0 266820 1.000000 16.852277 25.000000 21.187184 13.0 266510 1.000000 16.852277 25.000000 21.184937 14.0 265484 1.000000 16.852277 25.000000 21.180184 15.0 265258 1.000000 16.852277 25.000000 21.191504 16.0 262952 1.000000 16.852277 25.000000 21.187746 17.0 262602 1.000000 16.852277 25.000000 21.189778 18.0 260206 1.000000 16.852277 25.000000 21.188882 19.0 259410 1.000000 16.852277 25.000000 21.185684 20.0 256806 1.000000 16.852277 25.000000 21.194036 21.0 255574 1.000000 16.852277 25.000000 21.184115 22.0 255406 1.000000 16.852277 25.000000 21.178255 23.0 252394 1.000000 16.852277 25.000000 21.184644 24.0 252220 1.000000 16.852277 25.000000 21.187020 25.0 251668 1.000000 16.852277 25.000000 21.183827 26.0 249648 1.000000 16.852277 25.000000 21.183121 27.0 247160 1.000000 16.852277 25.000000 21.180872 28.0 246238 1.000000 16.852277 25.000000 21.185251 29.0 246030 1.000000 16.852277 25.000000 21.183488 30.0 242124 1.000000 16.852277 25.000000 21.194538 31.0 242426 1.000000 16.852277 25.000000 21.190702 32.0 239778 1.000000 16.852277 25.000000 21.188985 33.0 239046 1.000000 16.852277 25.000000 21.187092 34.0 237640 1.000000 16.852277 25.000000 21.185515 35.0 236256 1.000000 16.852277 25.000000 21.190278 36.0 233536 1.000000 16.852277 25.000000 21.183240 37.0 233274 1.000000 16.852277 25.000000 21.183796 38.0 231628 1.000000 16.852277 25.000000 21.200668 39.0 230378 1.000000 16.852277 25.000000 21.181153 40.0 229006 1.000000 """ try: from Corrfunc._countpairs_mocks import countpairs_rp_pi_mocks as\ DDrppi_extn except ImportError: msg = "Could not import the C extension for the on-sky"\ "pair counter." raise ImportError(msg) import numpy as np from warnings import warn from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\ return_file_with_rbins, convert_to_native_endian,\ is_native_endian from future.utils import bytes_to_native_str # Broadcast scalar weights to arrays if weights1 is not None: weights1 = np.atleast_1d(weights1) if weights2 is not None: weights2 = np.atleast_1d(weights2) if not autocorr: if RA2 is None or DEC2 is None or CZ2 is None: msg = "Must pass valid arrays for RA2/DEC2/CZ2 for "\ "computing cross-correlation" raise ValueError(msg) # If only one set of points has weights, set the other to uniform weights if weights1 is None and weights2 is not None: weights1 = np.ones_like(weights2) if weights2 is None and weights1 is not None: weights2 = np.ones_like(weights1) else: RA2 = np.empty(1) DEC2 = np.empty(1) CZ2 = np.empty(1) # Warn about non-native endian arrays if not all( is_native_endian(arr) for arr in [RA1, DEC1, CZ1, weights1, RA2, DEC2, CZ2, weights2]): warn( 'One or more input array has non-native endianness! A copy will be made with the correct endianness.' ) RA1, DEC1, CZ1, weights1, RA2, DEC2, CZ2, weights2 = [ convert_to_native_endian(arr) for arr in [RA1, DEC1, CZ1, weights1, RA2, DEC2, CZ2, weights2] ] fix_ra_dec(RA1, DEC1) if autocorr == 0: fix_ra_dec(RA2, DEC2) # Passing None parameters breaks the parsing code, so avoid this kwargs = {} for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2', 'CZ2']: v = locals()[k] if v is not None: kwargs[k] = v integer_isa = translate_isa_string_to_enum(isa) rbinfile, delete_after_use = return_file_with_rbins(binfile) extn_results, api_time = DDrppi_extn(autocorr, cosmology, nthreads, pimax, rbinfile, RA1, DEC1, CZ1, is_comoving_dist=is_comoving_dist, verbose=verbose, output_rpavg=output_rpavg, fast_divide=fast_divide, xbin_refine_factor=xbin_refine_factor, ybin_refine_factor=ybin_refine_factor, zbin_refine_factor=zbin_refine_factor, max_cells_per_dim=max_cells_per_dim, c_api_timer=c_api_timer, isa=integer_isa, **kwargs) if extn_results is None: msg = "RuntimeError occurred" raise RuntimeError(msg) if delete_after_use: import os os.remove(rbinfile) results_dtype = np.dtype([(bytes_to_native_str(b'rmin'), np.float), (bytes_to_native_str(b'rmax'), np.float), (bytes_to_native_str(b'rpavg'), np.float), (bytes_to_native_str(b'pimax'), np.float), (bytes_to_native_str(b'npairs'), np.uint64), (bytes_to_native_str(b'weightavg'), np.float)]) results = np.array(extn_results, dtype=results_dtype) if not c_api_timer: return results else: return results, api_time
def wp(boxsize, pimax, nthreads, binfile, X, Y, Z, weights=None, weight_type=None, verbose=False, output_rpavg=False, xbin_refine_factor=2, ybin_refine_factor=2, zbin_refine_factor=1, max_cells_per_dim=100, c_api_timer=False, c_cell_timer=False, isa='fastest'): """ Function to compute the projected correlation function in a periodic cosmological box. Pairs which are separated by less than the ``rp`` bins (specified in ``binfile``) in the X-Y plane, and less than ``pimax`` in the Z-dimension are counted. If ``weights`` are provided, the resulting correlation function is weighted. The weighting scheme depends on ``weight_type``. .. note:: Pairs are double-counted. And if ``rpmin`` is set to 0.0, then all the self-pairs (i'th particle with itself) are added to the first bin => minimum number of pairs in the first bin is the total number of particles. Parameters ----------- boxsize: double A double-precision value for the boxsize of the simulation in same units as the particle positions and the ``rp`` bins. pimax: double A double-precision value for the maximum separation along the Z-dimension. .. note:: Only pairs with ``0 <= dz < pimax`` are counted (no equality). nthreads: integer Number of threads to use. binfile: string or an list/array of floats For string input: filename specifying the ``rp`` bins for ``DDrppi_mocks``. The file should contain white-space separated values of (rpmin, rpmax) for each ``rp`` wanted. The bins do not need to be contiguous but must be in increasing order (smallest bins come first). For array-like input: A sequence of ``rp`` values that provides the bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid input, specifying 15 (logarithmic) bins between 0.1 and 10.0. This array does not need to be sorted. X/Y/Z: arraytype, real (float/double) Particle positions in the 3 axes. Must be within [0, boxsize] and specified in the same units as ``rp_bins`` and boxsize. All 3 arrays must be of the same floating-point type. Calculations will be done in the same precision as these arrays, i.e., calculations will be in floating point if XYZ are single precision arrays (C float type); or in double-precision if XYZ are double precision arrays (C double type). weights: array_like, real (float/double), optional A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). `weight_type` specifies how these weights are used; results are returned in the `weightavg` field. verbose: boolean (default false) Boolean flag to control output of informational messages output_rpavg: boolean (default false) Boolean flag to output the average ``rp`` for each bin. Code will run slower if you set this flag. .. note:: If you are calculating in single-precision, ``rpavg`` will suffer from numerical loss of precision and can not be trusted. If you need accurate ``rpavg`` values, then pass in double precision arrays for the particle positions. (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. max_cells_per_dim: integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is too small relative to the boxsize (and increasing helps the runtime). c_api_timer: boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. c_cell_timer : boolean (default false) Boolean flag to measure actual time spent **per cell-pair** within the C libraries. A very detailed timer that stores information about the number of particles in each cell, the thread id that processed that cell-pair and the amount of time in nano-seconds taken to process that cell pair. This timer can be used to study the instruction set efficiency, and load-balancing of the code. isa: string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] Setting isa to ``fastest`` will pick the fastest available instruction set on the current computer. However, if you set ``isa`` to, say, ``avx`` and ``avx`` is not available on the computer, then the code will revert to using ``fallback`` (even though ``sse42`` might be available). Unless you are benchmarking the different instruction sets, you should always leave ``isa`` to the default value. And if you *are* benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. weight_type: string, optional The type of weighting to apply. One of ["pair_product", None]. Default: None. Returns -------- results: Numpy structured array A numpy structured array containing [rpmin, rpmax, rpavg, wp, npairs, weightavg] for each radial specified in the ``binfile``. If ``output_rpavg`` is not set then ``rpavg`` will be set to 0.0 for all bins; similarly for ``weightavg``. ``wp`` contains the projected correlation function while ``npairs`` contains the number of unique pairs in that bin. If using weights, ``wp`` will be weighted while ``npairs`` will not be. api_time: float, optional Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time spent within the C library and ignores all python overhead. cell_time: list, optional Only returned if ``c_cell_timer`` is set. Contains detailed stats about each cell-pair visited during pair-counting, viz., number of particles in each of the cells in the pair, 1-D cell-indices for each cell in the pair, time (in nano-seconds) to process the pair and the thread-id for the thread that processed that cell-pair. Example -------- >>> from __future__ import print_function >>> import numpy as np >>> from os.path import dirname, abspath, join as pjoin >>> import Corrfunc >>> from Corrfunc.theory.wp import wp >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)), ... "../theory/tests/", "bins") >>> N = 10000 >>> boxsize = 420.0 >>> pimax = 40.0 >>> nthreads = 4 >>> seed = 42 >>> np.random.seed(seed) >>> X = np.random.uniform(0, boxsize, N) >>> Y = np.random.uniform(0, boxsize, N) >>> Z = np.random.uniform(0, boxsize, N) >>> results = wp(boxsize, pimax, nthreads, binfile, X, Y, Z, weights=np.ones_like(X), weight_type='pair_product') >>> for r in results: ... print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10.6f} {4:10d} {5:10.6f}". ... format(r['rmin'], r['rmax'], ... r['rpavg'], r['wp'], r['npairs'], r['weightavg'])) ... # doctest: +NORMALIZE_WHITESPACE 0.167536 0.238755 0.000000 66.717143 18 1.000000 0.238755 0.340251 0.000000 -15.786045 16 1.000000 0.340251 0.484892 0.000000 2.998470 42 1.000000 0.484892 0.691021 0.000000 -15.779885 66 1.000000 0.691021 0.984777 0.000000 -11.966728 142 1.000000 0.984777 1.403410 0.000000 -9.699906 298 1.000000 1.403410 2.000000 0.000000 -11.698771 588 1.000000 2.000000 2.850200 0.000000 3.848375 1466 1.000000 2.850200 4.061840 0.000000 -0.921452 2808 1.000000 4.061840 5.788530 0.000000 0.454851 5802 1.000000 5.788530 8.249250 0.000000 1.428344 11926 1.000000 8.249250 11.756000 0.000000 -1.067885 23478 1.000000 11.756000 16.753600 0.000000 -0.553319 47994 1.000000 16.753600 23.875500 0.000000 -0.086433 98042 1.000000 """ try: from Corrfunc._countpairs import countpairs_wp as wp_extn except ImportError: msg = "Could not import the C extension for the projected "\ "correlation function." raise ImportError(msg) import numpy as np from warnings import warn from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ is_native_endian # Broadcast scalar weights to arrays if weights is not None: weights = np.atleast_1d(weights) # Warn about non-native endian arrays if not all(is_native_endian(arr) for arr in [X, Y, Z, weights]): warn( 'One or more input array has non-native endianness! A copy will be made with the correct endianness.' ) X, Y, Z, weights = [ convert_to_native_endian(arr) for arr in X, Y, Z, weights ] # Passing None parameters breaks the parsing code, so avoid this kwargs = {} for k in ['weights', 'weight_type']: v = locals()[k] if v is not None: kwargs[k] = v integer_isa = translate_isa_string_to_enum(isa) rbinfile, delete_after_use = return_file_with_rbins(binfile) extn_results, api_time, cell_time = wp_extn( boxsize, pimax, nthreads, rbinfile, X, Y, Z, verbose=verbose, output_rpavg=output_rpavg, xbin_refine_factor=xbin_refine_factor, ybin_refine_factor=ybin_refine_factor, zbin_refine_factor=zbin_refine_factor, max_cells_per_dim=max_cells_per_dim, c_api_timer=c_api_timer, c_cell_timer=c_cell_timer, isa=integer_isa, **kwargs) if extn_results is None: msg = "RuntimeError occurred" raise RuntimeError(msg) if delete_after_use: import os os.remove(rbinfile) results_dtype = np.dtype([(bytes_to_native_str(b'rmin'), np.float), (bytes_to_native_str(b'rmax'), np.float), (bytes_to_native_str(b'rpavg'), np.float), (bytes_to_native_str(b'wp'), np.float), (bytes_to_native_str(b'npairs'), np.uint64), (bytes_to_native_str(b'weightavg'), np.float)]) results = np.array(extn_results, dtype=results_dtype) # A better solution for returning multiple values based on # input parameter. Lifted straight from numpy.unique -- MS 10/26/2016 optional_returns = c_api_timer or c_cell_timer if not optional_returns: ret = results else: ret = (results, ) if c_api_timer: ret += (api_time, ) if c_cell_timer: # Convert to numpy structured array np_cell_time = _convert_cell_timer(cell_time) ret += (np_cell_time, ) return ret
def xi(boxsize, nthreads, binfile, X, Y, Z, weights=None, weight_type=None, verbose=False, output_ravg=False, xbin_refine_factor=2, ybin_refine_factor=2, zbin_refine_factor=1, max_cells_per_dim=100, c_api_timer=False, isa=r'fastest'): """ Function to compute the projected correlation function in a periodic cosmological box. Pairs which are separated by less than the ``r`` bins (specified in ``binfile``) in 3-D real space. If ``weights`` are provided, the resulting correlation function is weighted. The weighting scheme depends on ``weight_type``. .. note:: Pairs are double-counted. And if ``rmin`` is set to 0.0, then all the self-pairs (i'th particle with itself) are added to the first bin => minimum number of pairs in the first bin is the total number of particles. Parameters ----------- boxsize: double A double-precision value for the boxsize of the simulation in same units as the particle positions and the ``r`` bins. nthreads: integer Number of threads to use. binfile: string or an list/array of floats For string input: filename specifying the ``r`` bins for ``xi``. The file should contain white-space separated values of (rmin, rmax) for each ``r`` wanted. The bins need to be contiguous and sorted in increasing order (smallest bins come first). For array-like input: A sequence of ``r`` values that provides the bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid input specifying **14** (logarithmic) bins between 0.1 and 10.0. This array does not need to be sorted. X/Y/Z: arraytype, real (float/double) Particle positions in the 3 axes. Must be within [0, boxsize] and specified in the same units as ``rp_bins`` and boxsize. All 3 arrays must be of the same floating-point type. Calculations will be done in the same precision as these arrays, i.e., calculations will be in floating point if XYZ are single precision arrays (C float type); or in double-precision if XYZ are double precision arrays (C double type). weights: array_like, real (float/double), optional A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). `weight_type` specifies how these weights are used; results are returned in the `weightavg` field. verbose: boolean (default false) Boolean flag to control output of informational messages output_ravg: boolean (default false) Boolean flag to output the average ``r`` for each bin. Code will run slower if you set this flag. Note: If you are calculating in single-precision, ``rpavg`` will suffer from numerical loss of precision and can not be trusted. If you need accurate ``rpavg`` values, then pass in double precision arrays for the particle positions. (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. max_cells_per_dim: integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is too small relative to the boxsize (and increasing helps the runtime). c_api_timer: boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. isa: string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] Setting isa to ``fastest`` will pick the fastest available instruction set on the current computer. However, if you set ``isa`` to, say, ``avx`` and ``avx`` is not available on the computer, then the code will revert to using ``fallback`` (even though ``sse42`` might be available). Unless you are benchmarking the different instruction sets, you should always leave ``isa`` to the default value. And if you *are* benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. weight_type: string, optional, Default: None. The type of weighting to apply. One of ["pair_product", None]. Returns -------- results: Numpy structured array A numpy structured array containing [rmin, rmax, ravg, xi, npairs, weightavg] for each radial specified in the ``binfile``. If ``output_ravg`` is not set then ``ravg`` will be set to 0.0 for all bins; similarly for ``weightavg``. ``xi`` contains the correlation function while ``npairs`` contains the number of pairs in that bin. If using weights, ``xi`` will be weighted while ``npairs`` will not be. api_time: float, optional Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time spent within the C library and ignores all python overhead. Example -------- >>> from __future__ import print_function >>> import numpy as np >>> from os.path import dirname, abspath, join as pjoin >>> import Corrfunc >>> from Corrfunc.theory.xi import xi >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)), ... "../theory/tests/", "bins") >>> N = 100000 >>> boxsize = 420.0 >>> nthreads = 4 >>> seed = 42 >>> np.random.seed(seed) >>> X = np.random.uniform(0, boxsize, N) >>> Y = np.random.uniform(0, boxsize, N) >>> Z = np.random.uniform(0, boxsize, N) >>> weights = np.ones_like(X) >>> results = xi(boxsize, nthreads, binfile, X, Y, Z, weights=weights, weight_type='pair_product', output_ravg=True) >>> for r in results: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10.6f} {4:10d} {5:10.6f}" ... .format(r['rmin'], r['rmax'], ... r['ravg'], r['xi'], r['npairs'], r['weightavg'])) ... # doctest: +NORMALIZE_WHITESPACE 0.167536 0.238755 0.226592 -0.205733 4 1.000000 0.238755 0.340251 0.289277 -0.176729 12 1.000000 0.340251 0.484892 0.426819 -0.051829 40 1.000000 0.484892 0.691021 0.596187 -0.131853 106 1.000000 0.691021 0.984777 0.850100 -0.049207 336 1.000000 0.984777 1.403410 1.225112 0.028543 1052 1.000000 1.403410 2.000000 1.737153 0.011403 2994 1.000000 2.000000 2.850200 2.474588 0.005405 8614 1.000000 2.850200 4.061840 3.532018 -0.014098 24448 1.000000 4.061840 5.788530 5.022241 -0.010784 70996 1.000000 5.788530 8.249250 7.160648 -0.001588 207392 1.000000 8.249250 11.756000 10.207213 -0.000323 601002 1.000000 11.756000 16.753600 14.541171 0.000007 1740084 1.000000 16.753600 23.875500 20.728773 -0.001595 5028058 1.000000 """ try: from Corrfunc._countpairs import countpairs_xi as xi_extn except ImportError: msg = "Could not import the C extension for the projected "\ "correlation function." raise ImportError(msg) import numpy as np from warnings import warn from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ is_native_endian # Broadcast scalar weights to arrays if weights is not None: weights = np.atleast_1d(weights) # Warn about non-native endian arrays if not all(is_native_endian(arr) for arr in [X, Y, Z, weights]): warn( 'One or more input array has non-native endianness! A copy will be made with the correct endianness.' ) X, Y, Z, weights = [ convert_to_native_endian(arr) for arr in [X, Y, Z, weights] ] # Passing None parameters breaks the parsing code, so avoid this kwargs = {} for k in ['weights', 'weight_type']: v = locals()[k] if v is not None: kwargs[k] = v integer_isa = translate_isa_string_to_enum(isa) rbinfile, delete_after_use = return_file_with_rbins(binfile) extn_results, api_time = xi_extn(boxsize, nthreads, rbinfile, X, Y, Z, verbose=verbose, output_ravg=output_ravg, xbin_refine_factor=xbin_refine_factor, ybin_refine_factor=ybin_refine_factor, zbin_refine_factor=zbin_refine_factor, max_cells_per_dim=max_cells_per_dim, c_api_timer=c_api_timer, isa=integer_isa, **kwargs) if extn_results is None: msg = "RuntimeError occurred" raise RuntimeError(msg) if delete_after_use: import os os.remove(rbinfile) results_dtype = np.dtype([(bytes_to_native_str(b'rmin'), np.float), (bytes_to_native_str(b'rmax'), np.float), (bytes_to_native_str(b'ravg'), np.float), (bytes_to_native_str(b'xi'), np.float), (bytes_to_native_str(b'npairs'), np.uint64), (bytes_to_native_str(b'weightavg'), np.float)]) results = np.array(extn_results, dtype=results_dtype) if not c_api_timer: return results else: return results, api_time
def DDrppi(autocorr, nthreads, pimax, binfile, X1, Y1, Z1, weights1=None, periodic=True, X2=None, Y2=None, Z2=None, weights2=None, verbose=False, boxsize=0.0, output_rpavg=False, xbin_refine_factor=2, ybin_refine_factor=2, zbin_refine_factor=1, max_cells_per_dim=100, c_api_timer=False, isa=r'fastest', weight_type=None): """ Calculate the 3-D pair-counts corresponding to the real-space correlation function, :math:`\\xi(r_p, \pi)` or :math:`\\wp(r_p)`. Pairs which are separated by less than the ``rp`` bins (specified in ``binfile``) in the X-Y plane, and less than ``pimax`` in the Z-dimension are counted. If ``weights`` are provided, the resulting pair counts are weighted. The weighting scheme depends on ``weight_type``. .. note:: that this module only returns pair counts and not the actual correlation function :math:`\\xi(r_p, \pi)` or :math:`wp(r_p)`. See the utilities :py:mod:`Corrfunc.utils.convert_3d_counts_to_cf` and :py:mod:`Corrfunc.utils.convert_rp_pi_counts_to_wp` for computing :math:`\\xi(r_p, \pi)` and :math:`wp(r_p)` respectively from the pair counts. Parameters ----------- autocorr: boolean, required Boolean flag for auto/cross-correlation. If autocorr is set to 1, then the second set of particle positions are not required. nthreads: integer The number of OpenMP threads to use. Has no effect if OpenMP was not enabled during library compilation. pimax: double A double-precision value for the maximum separation along the Z-dimension. Distances along the :math:``\\pi`` direction are binned with unit depth. For instance, if ``pimax=40``, then 40 bins will be created along the ``pi`` direction. Note: Only pairs with ``0 <= dz < pimax`` are counted (no equality). binfile: string or an list/array of floats For string input: filename specifying the ``rp`` bins for ``DDrppi``. The file should contain white-space separated values of (rpmin, rpmax) for each ``rp`` wanted. The bins need to be contiguous and sorted in increasing order (smallest bins come first). For array-like input: A sequence of ``rp`` values that provides the bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid input specifying **14** (logarithmic) bins between 0.1 and 10.0. This array does not need to be sorted. X1/Y1/Z1: array-like, real (float/double) The array of X/Y/Z positions for the first set of points. Calculations are done in the precision of the supplied arrays. weights1: array_like, real (float/double), optional A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). `weight_type` specifies how these weights are used; results are returned in the `weightavg` field. If only one of weights1 and weights2 is specified, the other will be set to uniform weights. X2/Y2/Z2: array-like, real (float/double) Array of XYZ positions for the second set of points. *Must* be the same precision as the X1/Y1/Z1 arrays. Only required when ``autocorr==0``. weights2: array-like, real (float/double), optional Same as weights1, but for the second set of positions periodic: boolean Boolean flag to indicate periodic boundary conditions. verbose: boolean (default false) Boolean flag to control output of informational messages boxsize: double The side-length of the cube in the cosmological simulation. Present to facilitate exact calculations for periodic wrapping. If boxsize is not supplied, then the wrapping is done based on the maximum difference within each dimension of the X/Y/Z arrays. output_rpavg: boolean (default false) Boolean flag to output the average ``rp`` for each bin. Code will run slower if you set this flag. Note: If you are calculating in single-precision, ``rpavg`` will suffer from numerical loss of precision and can not be trusted. If you need accurate ``rpavg`` values, then pass in double precision arrays for the particle positions. (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. max_cells_per_dim: integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``rpmax`` is too small relative to the boxsize (and increasing helps the runtime). c_api_timer: boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. isa: string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] Setting isa to ``fastest`` will pick the fastest available instruction set on the current computer. However, if you set ``isa`` to, say, ``avx`` and ``avx`` is not available on the computer, then the code will revert to using ``fallback`` (even though ``sse42`` might be available). Unless you are benchmarking the different instruction sets, you should always leave ``isa`` to the default value. And if you *are* benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. weight_type: string, optional The type of weighting to apply. One of ["pair_product", None]. Default: None. Returns -------- results: Numpy structured array A numpy structured array containing [rpmin, rpmax, rpavg, pimax, npairs, weightavg] for each radial bin specified in the ``binfile``. If ``output_rpavg`` is not set, then ``rpavg`` will be set to 0.0 for all bins; similarly for ``weightavg``. ``npairs`` contains the number of pairs in that bin and can be used to compute :math:`\\xi(r_p, \pi)` by combining with (DR, RR) counts. api_time: float, optional Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time spent within the C library and ignores all python overhead. Example -------- >>> from __future__ import print_function >>> import numpy as np >>> from os.path import dirname, abspath, join as pjoin >>> import Corrfunc >>> from Corrfunc.theory.DDrppi import DDrppi >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)), ... "../theory/tests/", "bins") >>> N = 10000 >>> boxsize = 420.0 >>> nthreads = 4 >>> autocorr = 1 >>> pimax = 40.0 >>> seed = 42 >>> np.random.seed(seed) >>> X = np.random.uniform(0, boxsize, N) >>> Y = np.random.uniform(0, boxsize, N) >>> Z = np.random.uniform(0, boxsize, N) >>> weights = np.ones_like(X) >>> results = DDrppi(autocorr, nthreads, pimax, binfile, ... X, Y, Z, weights1=weights, weight_type='pair_product', output_rpavg=True) >>> for r in results[519:]: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10.1f}" ... " {4:10d} {5:10.6f}".format(r['rmin'], r['rmax'], ... r['rpavg'], r['pimax'], r['npairs'], r['weightavg'])) ... # doctest: +NORMALIZE_WHITESPACE 11.756000 16.753600 14.379250 40.0 1150 1.000000 16.753600 23.875500 20.449131 1.0 2604 1.000000 16.753600 23.875500 20.604834 2.0 2370 1.000000 16.753600 23.875500 20.523989 3.0 2428 1.000000 16.753600 23.875500 20.475181 4.0 2462 1.000000 16.753600 23.875500 20.458005 5.0 2532 1.000000 16.753600 23.875500 20.537162 6.0 2522 1.000000 16.753600 23.875500 20.443087 7.0 2422 1.000000 16.753600 23.875500 20.474580 8.0 2360 1.000000 16.753600 23.875500 20.420360 9.0 2512 1.000000 16.753600 23.875500 20.478355 10.0 2472 1.000000 16.753600 23.875500 20.485268 11.0 2406 1.000000 16.753600 23.875500 20.372985 12.0 2420 1.000000 16.753600 23.875500 20.647998 13.0 2378 1.000000 16.753600 23.875500 20.556208 14.0 2420 1.000000 16.753600 23.875500 20.527992 15.0 2462 1.000000 16.753600 23.875500 20.581017 16.0 2380 1.000000 16.753600 23.875500 20.491819 17.0 2346 1.000000 16.753600 23.875500 20.534440 18.0 2496 1.000000 16.753600 23.875500 20.529129 19.0 2512 1.000000 16.753600 23.875500 20.501946 20.0 2500 1.000000 16.753600 23.875500 20.513349 21.0 2544 1.000000 16.753600 23.875500 20.471915 22.0 2430 1.000000 16.753600 23.875500 20.450651 23.0 2354 1.000000 16.753600 23.875500 20.550753 24.0 2460 1.000000 16.753600 23.875500 20.540262 25.0 2490 1.000000 16.753600 23.875500 20.559572 26.0 2350 1.000000 16.753600 23.875500 20.534245 27.0 2382 1.000000 16.753600 23.875500 20.511302 28.0 2508 1.000000 16.753600 23.875500 20.491632 29.0 2456 1.000000 16.753600 23.875500 20.592493 30.0 2386 1.000000 16.753600 23.875500 20.506234 31.0 2484 1.000000 16.753600 23.875500 20.482109 32.0 2538 1.000000 16.753600 23.875500 20.518463 33.0 2544 1.000000 16.753600 23.875500 20.482515 34.0 2534 1.000000 16.753600 23.875500 20.503124 35.0 2382 1.000000 16.753600 23.875500 20.471307 36.0 2356 1.000000 16.753600 23.875500 20.384231 37.0 2554 1.000000 16.753600 23.875500 20.454012 38.0 2458 1.000000 16.753600 23.875500 20.585543 39.0 2394 1.000000 16.753600 23.875500 20.504965 40.0 2500 1.000000 """ try: from Corrfunc._countpairs import countpairs_rp_pi as DDrppi_extn except ImportError: msg = "Could not import the C extension for the 3-D "\ "real-space pair counter." raise ImportError(msg) import numpy as np from warnings import warn from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ is_native_endian from future.utils import bytes_to_native_str # Broadcast scalar weights to arrays if weights1 is not None: weights1 = np.atleast_1d(weights1) if weights2 is not None: weights2 = np.atleast_1d(weights2) if not autocorr: if X2 is None or Y2 is None or Z2 is None: msg = "Must pass valid arrays for X2/Y2/Z2 for "\ "computing cross-correlation" raise ValueError(msg) # If only one set of points has weights, set the other to uniform weights if weights1 is None and weights2 is not None: weights1 = np.ones_like(weights2) if weights2 is None and weights1 is not None: weights2 = np.ones_like(weights1) else: X2 = np.empty(1) Y2 = np.empty(1) Z2 = np.empty(1) # Warn about non-native endian arrays if not all(is_native_endian(arr) for arr in [X1, Y1, Z1, weights1, X2, Y2, Z2, weights2]): warn('One or more input array has non-native endianness! A copy will be made with the correct endianness.') X1, Y1, Z1, weights1, X2, Y2, Z2, weights2 = [convert_to_native_endian(arr) for arr in [X1, Y1, Z1, weights1, X2, Y2, Z2, weights2]] # Passing None parameters breaks the parsing code, so avoid this kwargs = {} for k in ['weights1', 'weights2', 'weight_type', 'X2', 'Y2', 'Z2']: v = locals()[k] if v is not None: kwargs[k] = v integer_isa = translate_isa_string_to_enum(isa) rbinfile, delete_after_use = return_file_with_rbins(binfile) extn_results, api_time = DDrppi_extn(autocorr, nthreads, pimax, rbinfile, X1, Y1, Z1, periodic=periodic, verbose=verbose, boxsize=boxsize, output_rpavg=output_rpavg, xbin_refine_factor=xbin_refine_factor, ybin_refine_factor=ybin_refine_factor, zbin_refine_factor=zbin_refine_factor, max_cells_per_dim=max_cells_per_dim, c_api_timer=c_api_timer, isa=integer_isa, **kwargs) if extn_results is None: msg = "RuntimeError occurred" raise RuntimeError(msg) if delete_after_use: import os os.remove(rbinfile) results_dtype = np.dtype([(bytes_to_native_str(b'rmin'), np.float), (bytes_to_native_str(b'rmax'), np.float), (bytes_to_native_str(b'rpavg'), np.float), (bytes_to_native_str(b'pimax'), np.float), (bytes_to_native_str(b'npairs'), np.uint64), (bytes_to_native_str(b'weightavg'), np.float),]) results = np.array(extn_results, dtype=results_dtype) if not c_api_timer: return results else: return results, api_time
def vpf(rmax, nbins, nspheres, numpN, seed, X, Y, Z, verbose=False, periodic=True, boxsize=0.0, xbin_refine_factor=1, ybin_refine_factor=1, zbin_refine_factor=1, max_cells_per_dim=100, c_api_timer=False, isa=r'fastest'): """ Function to compute the counts-in-cells on 3-D real-space points. Returns a numpy structured array containing the probability of a sphere of radius up to ``rmax`` containing [0, numpN-1] galaxies. Parameters ----------- rmax: double Maximum radius of the sphere to place on the particles nbins: integer Number of bins in the counts-in-cells. Radius of first shell is rmax/nbins nspheres: integer (>= 0) Number of random spheres to place within the particle distribution. For a small number of spheres, the error is larger in the measured pN's. numpN: integer (>= 1) Governs how many unique pN's are to returned. If ``numpN`` is set to 1, then only the vpf (p0) is returned. For ``numpN=2``, p0 and p1 are returned. More explicitly, the columns in the results look like the following: ====== ========================== numpN Columns in output ====== ========================== 1 p0 2 p0 p1 3 p0 p1 p2 4 p0 p1 p2 p3 ====== ========================== and so on... Note: ``p0`` is the vpf seed: unsigned integer Random number seed for the underlying GSL random number generator. Used to draw centers of the spheres. X/Y/Z: arraytype, real (float/double) Particle positions in the 3 axes. Must be within [0, boxsize] and specified in the same units as ``rp_bins`` and boxsize. All 3 arrays must be of the same floating-point type. Calculations will be done in the same precision as these arrays, i.e., calculations will be in floating point if XYZ are single precision arrays (C float type); or in double-precision if XYZ are double precision arrays (C double type). verbose: boolean (default false) Boolean flag to control output of informational messages periodic: boolean Boolean flag to indicate periodic boundary conditions. boxsize: double The side-length of the cube in the cosmological simulation. Present to facilitate exact calculations for periodic wrapping. If boxsize is not supplied, then the wrapping is done based on the maximum difference within each dimension of the X/Y/Z arrays. (xyz)bin_refine_factor: integer, default is (1,1,1); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. Note: Since the counts in spheres calculation is symmetric in all 3 dimensions, the defaults are different from the clustering routines. max_cells_per_dim: integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is too small relative to the boxsize (and increasing helps the runtime). c_api_timer: boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. isa: string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] Setting isa to ``fastest`` will pick the fastest available instruction set on the current computer. However, if you set ``isa`` to, say, ``avx`` and ``avx`` is not available on the computer, then the code will revert to using ``fallback`` (even though ``sse42`` might be available). Unless you are benchmarking the different instruction sets, you should always leave ``isa`` to the default value. And if you *are* benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. Returns -------- results: Numpy structured array A numpy structured array containing [rmax, pN[numpN]] with ``nbins`` elements. Each row contains the maximum radius of the sphere and the ``numpN`` elements in the ``pN`` array. Each element of this array contains the probability that a sphere of radius ``rmax`` contains *exactly* ``N`` galaxies. For example, pN[0] (p0, the void probibility function) is the probability that a sphere of radius ``rmax`` contains 0 galaxies. if ``c_api_timer`` is set, then the return value is a tuple containing (results, api_time). ``api_time`` measures only the time spent within the C library and ignores all python overhead. Example -------- >>> from __future__ import print_function >>> import numpy as np >>> from Corrfunc.theory.vpf import vpf >>> rmax = 10.0 >>> nbins = 10 >>> nspheres = 10000 >>> numpN = 5 >>> seed = -1 >>> N = 100000 >>> boxsize = 420.0 >>> seed = 42 >>> np.random.seed(seed) >>> X = np.random.uniform(0, boxsize, N) >>> Y = np.random.uniform(0, boxsize, N) >>> Z = np.random.uniform(0, boxsize, N) >>> results = vpf(rmax, nbins, nspheres, numpN, seed, X, Y, Z) >>> for r in results: ... print("{0:10.1f} ".format(r[0]), end="") ... # doctest: +NORMALIZE_WHITESPACE ... for pn in r[1]: ... print("{0:10.3f} ".format(pn), end="") ... # doctest: +NORMALIZE_WHITESPACE ... print("") # doctest: +NORMALIZE_WHITESPACE 1.0 0.995 0.005 0.000 0.000 0.000 2.0 0.956 0.044 0.001 0.000 0.000 3.0 0.858 0.130 0.012 0.001 0.000 4.0 0.695 0.252 0.047 0.005 0.001 5.0 0.493 0.347 0.127 0.028 0.005 6.0 0.295 0.362 0.219 0.091 0.026 7.0 0.141 0.285 0.265 0.179 0.085 8.0 0.056 0.159 0.228 0.229 0.161 9.0 0.019 0.066 0.135 0.192 0.192 10.0 0.003 0.019 0.054 0.106 0.150 """ try: from Corrfunc._countpairs import countspheres_vpf as vpf_extn except ImportError: msg = "Could not import the C extension for the Counts-in-Cells "\ " (vpf)" raise ImportError(msg) import numpy as np from warnings import warn from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ convert_to_native_endian, is_native_endian, sys_pipes from math import pi if numpN <= 0: msg = "Number of counts-in-cells wanted must be at least 1" raise ValueError(msg) if boxsize > 0.0: volume = boxsize * boxsize * boxsize else: volume = (max(X) - min(X)) * \ (max(Y) - min(Y)) * \ (max(Z) - min(Z)) volume_sphere = 4. / 3. * pi * rmax * rmax * rmax if nspheres * volume_sphere > volume: msg = "There are not as many independent volumes in the "\ "requested particle distribution. Num. spheres = {0} "\ "rmax = {1} => effective volume = {2}.\nVolume of particles ="\ "{3}. Reduce rmax or Nspheres"\ .format(nspheres, rmax, nspheres * volume_sphere, volume) raise ValueError(msg) # Warn about non-native endian arrays if not all(is_native_endian(arr) for arr in [X, Y, Z]): warn( 'One or more input array has non-native endianness! A copy will be made with the correct endianness.' ) X, Y, Z = [convert_to_native_endian(arr) for arr in [X, Y, Z]] integer_isa = translate_isa_string_to_enum(isa) with sys_pipes(): extn_results = vpf_extn(rmax, nbins, nspheres, numpN, seed, X, Y, Z, verbose=verbose, periodic=periodic, boxsize=boxsize, xbin_refine_factor=xbin_refine_factor, ybin_refine_factor=ybin_refine_factor, zbin_refine_factor=zbin_refine_factor, max_cells_per_dim=max_cells_per_dim, c_api_timer=c_api_timer, isa=integer_isa) if extn_results is None: msg = "RuntimeError occurred" raise RuntimeError(msg) else: extn_results, api_time = extn_results results_dtype = np.dtype([(bytes_to_native_str(b'rmax'), np.float), (bytes_to_native_str(b'pN'), (np.float, numpN))]) nbin = len(extn_results) results = np.zeros(nbin, dtype=results_dtype) for ii, r in enumerate(extn_results): results['rmax'][ii] = r[0] if numpN == 1: results['pN'] = r[1] else: for j in range(numpN): results['pN'][ii][j] = r[1 + j] if not c_api_timer: return results else: return results, api_time
def DDtheta_mocks(autocorr, nthreads, binfile, RA1, DEC1, weights1=None, RA2=None, DEC2=None, weights2=None, link_in_dec=True, link_in_ra=True, verbose=False, output_thetaavg=False, fast_acos=False, ra_refine_factor=2, dec_refine_factor=2, max_cells_per_dim=100, c_api_timer=False, isa=r'fastest', weight_type=None): """ Function to compute the angular correlation function for points on the sky (i.e., mock catalogs or observed galaxies). Returns a numpy structured array containing the pair counts for the specified angular bins. If ``weights`` are provided, the resulting pair counts are weighted. The weighting scheme depends on ``weight_type``. .. note:: This module only returns pair counts and not the actual correlation function :math:`\\omega(\theta)`. See :py:mod:`Corrfunc.utils.convert_3d_counts_to_cf` for computing :math:`\\omega(\theta)` from the pair counts returned. Parameters ----------- autocorr : boolean, required Boolean flag for auto/cross-correlation. If autocorr is set to 1, then the second set of particle positions are not required. nthreads : integer Number of threads to use. binfile: string or an list/array of floats. Units: degrees. For string input: filename specifying the ``theta`` bins for ``DDtheta_mocks``. The file should contain white-space separated values of (thetamin, thetamax) for each ``theta`` wanted. The bins need to be contiguous and sorted in increasing order (smallest bins come first). For array-like input: A sequence of ``theta`` values that provides the bin-edges. For example, ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid input specifying **14** (logarithmic) bins between 0.1 and 10.0 degrees. This array does not need to be sorted. RA1 : array-like, real (float/double) The array of Right Ascensions for the first set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply RA's in [0.0, 360.0]. Calculations are done in the precision of the supplied arrays. DEC1 : array-like, real (float/double) Array of Declinations for the first set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in [-90.0, 90.0]. Must be of same precision type as RA1. weights1 : array_like, real (float/double), optional A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,). `weight_type` specifies how these weights are used; results are returned in the `weightavg` field. If only one of weights1 and weights2 is specified, the other will be set to uniform weights. RA2 : array-like, real (float/double) The array of Right Ascensions for the second set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply RA's in [0.0, 360.0]. Must be of same precision type as RA1/DEC1. DEC2 : array-like, real (float/double) Array of Declinations for the second set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in [-90.0, 90.0]. Must be of same precision type as RA1/DEC1. weights2 : array-like, real (float/double), optional Same as weights1, but for the second set of positions link_in_dec : boolean (default True) Boolean flag to create lattice in Declination. Code runs faster with this option. However, if the angular separations are too small, then linking in declination might produce incorrect results. When running for the first time, check your results by comparing with the output of the code for ``link_in_dec=False`` and ``link_in_ra=False``. link_in_ra : boolean (default True) Boolean flag to create lattice in Right Ascension. Setting this option implies ``link_in_dec=True``. Similar considerations as ``link_in_dec`` described above. If you disable both ``link_in_dec`` and ``link_in_ra``, then the code reduces to a brute-force pair counter. No lattices are created at all. For very small angular separations, the brute-force method might be the most numerically stable method. verbose : boolean (default false) Boolean flag to control output of informational messages output_thetaavg : boolean (default false) Boolean flag to output the average ``\theta`` for each bin. Code will run slower if you set this flag. If you are calculating in single-precision, ``thetaavg`` will suffer from numerical loss of precision and can not be trusted. If you need accurate ``thetaavg`` values, then pass in double precision arrays for ``RA/DEC``. Code will run significantly slower if you enable this option. Use the keyword ``fast_acos`` if you can tolerate some loss of precision. fast_acos : boolean (default false) Flag to use numerical approximation for the ``arccos`` - gives better performance at the expense of some precision. Relevant only if ``output_thetaavg==True``. Developers: Two versions already coded up in ``utils/fast_acos.h``, so you can choose the version you want. There are also notes on how to implement faster (and less accurate) functions, particularly relevant if you know your ``theta`` range is limited. If you implement a new version, then you will have to reinstall the entire Corrfunc package. Note: Tests will fail if you run the tests with``fast_acos=True``. (radec)_refine_factor : integer, default is (2,2); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. Only two refine factors are to be specified and these correspond to ``ra`` and ``dec`` (rather, than the usual three of ``(xyz)bin_refine_factor`` for all other correlation functions). max_cells_per_dim : integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``thetamax`` is too small relative to the boxsize (and increasing helps the runtime). c_api_timer : boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. isa : string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] Setting isa to ``fastest`` will pick the fastest available instruction set on the current computer. However, if you set ``isa`` to, say, ``avx`` and ``avx`` is not available on the computer, then the code will revert to using ``fallback`` (even though ``sse42`` might be available). Unless you are benchmarking the different instruction sets, you should always leave ``isa`` to the default value. And if you *are* benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. Returns -------- results : Numpy structured array A numpy structured array containing [thetamin, thetamax, thetaavg, npairs, weightavg] for each angular bin specified in the ``binfile``. If ``output_thetaavg`` is not set then ``thetavg`` will be set to 0.0 for all bins; similarly for ``weightavg``. ``npairs`` contains the number of pairs in that bin. api_time : float, optional Only returned if ``c_api_timer`` is set. ``api_time`` measures only the time spent within the C library and ignores all python overhead. Example -------- >>> from __future__ import print_function >>> import numpy as np >>> import time >>> from math import pi >>> from os.path import dirname, abspath, join as pjoin >>> import Corrfunc >>> from Corrfunc.mocks.DDtheta_mocks import DDtheta_mocks >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)), ... "../mocks/tests/", "angular_bins") >>> N = 100000 >>> nthreads = 4 >>> seed = 42 >>> np.random.seed(seed) >>> RA = np.random.uniform(0.0, 2.0*pi, N)*180.0/pi >>> cos_theta = np.random.uniform(-1.0, 1.0, N) >>> DEC = 90.0 - np.arccos(cos_theta)*180.0/pi >>> weights = np.ones_like(RA) >>> autocorr = 1 >>> for isa in ['AVX', 'SSE42', 'FALLBACK']: ... for link_in_dec in [False, True]: ... for link_in_ra in [False, True]: ... results = DDtheta_mocks(autocorr, nthreads, binfile, ... RA, DEC, output_thetaavg=True, ... weights1=weights, weight_type='pair_product', ... link_in_dec=link_in_dec, link_in_ra=link_in_ra, ... isa=isa, verbose=True) >>> for r in results: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10d} {4:10.6f}". ... format(r['thetamin'], r['thetamax'], ... r['thetaavg'], r['npairs'], r['weightavg'])) ... # doctest: +NORMALIZE_WHITESPACE 0.010000 0.014125 0.012272 62 1.000000 0.014125 0.019953 0.016978 172 1.000000 0.019953 0.028184 0.024380 298 1.000000 0.028184 0.039811 0.034321 598 1.000000 0.039811 0.056234 0.048535 1164 1.000000 0.056234 0.079433 0.068385 2438 1.000000 0.079433 0.112202 0.096631 4658 1.000000 0.112202 0.158489 0.136834 9414 1.000000 0.158489 0.223872 0.192967 19098 1.000000 0.223872 0.316228 0.272673 37848 1.000000 0.316228 0.446684 0.385344 75520 1.000000 0.446684 0.630957 0.543973 150938 1.000000 0.630957 0.891251 0.768406 301854 1.000000 0.891251 1.258925 1.085273 599896 1.000000 1.258925 1.778279 1.533461 1200238 1.000000 1.778279 2.511886 2.166009 2396338 1.000000 2.511886 3.548134 3.059159 4775162 1.000000 3.548134 5.011872 4.321445 9532582 1.000000 5.011872 7.079458 6.104214 19001930 1.000000 7.079458 10.000000 8.622400 37842502 1.000000 """ try: from Corrfunc._countpairs_mocks import countpairs_theta_mocks as\ DDtheta_mocks_extn except ImportError: msg = "Could not import the C extension for the angular "\ "correlation function for mocks." raise ImportError(msg) import numpy as np from warnings import warn from Corrfunc.utils import translate_isa_string_to_enum, fix_ra_dec,\ return_file_with_rbins, convert_to_native_endian,\ is_native_endian, sys_pipes from future.utils import bytes_to_native_str # Broadcast scalar weights to arrays if weights1 is not None: weights1 = np.atleast_1d(weights1) if weights2 is not None: weights2 = np.atleast_1d(weights2) if autocorr == 0: if RA2 is None or DEC2 is None: msg = "Must pass valid arrays for RA2/DEC2 for "\ "computing cross-correlation" raise ValueError(msg) # If only one set of points has weights, set the other to uniform weights if weights1 is None and weights2 is not None: weights1 = np.ones_like(weights2) if weights2 is None and weights1 is not None: weights2 = np.ones_like(weights1) else: RA2 = np.empty(1) DEC2 = np.empty(1) # Warn about non-native endian arrays if not all( is_native_endian(arr) for arr in [RA1, DEC1, weights1, RA2, DEC2, weights2]): warn( 'One or more input array has non-native endianness! A copy will be made with the correct endianness.' ) RA1, DEC1, weights1, RA2, DEC2, weights2 = [ convert_to_native_endian(arr) for arr in [RA1, DEC1, weights1, RA2, DEC2, weights2] ] fix_ra_dec(RA1, DEC1) if autocorr == 0: fix_ra_dec(RA2, DEC2) if link_in_ra is True: link_in_dec = True # Passing None parameters breaks the parsing code, so avoid this kwargs = {} for k in ['weights1', 'weights2', 'weight_type', 'RA2', 'DEC2']: v = locals()[k] if v is not None: kwargs[k] = v integer_isa = translate_isa_string_to_enum(isa) rbinfile, delete_after_use = return_file_with_rbins(binfile) with sys_pipes(): extn_results = DDtheta_mocks_extn(autocorr, nthreads, rbinfile, RA1, DEC1, verbose=verbose, link_in_dec=link_in_dec, link_in_ra=link_in_ra, output_thetaavg=output_thetaavg, fast_acos=fast_acos, ra_refine_factor=ra_refine_factor, dec_refine_factor=dec_refine_factor, max_cells_per_dim=max_cells_per_dim, c_api_timer=c_api_timer, isa=integer_isa, **kwargs) if extn_results is None: msg = "RuntimeError occurred" raise RuntimeError(msg) else: extn_results, api_time = extn_results if delete_after_use: import os os.remove(rbinfile) results_dtype = np.dtype([(bytes_to_native_str(b'thetamin'), np.float), (bytes_to_native_str(b'thetamax'), np.float), (bytes_to_native_str(b'thetaavg'), np.float), (bytes_to_native_str(b'npairs'), np.uint64), (bytes_to_native_str(b'weightavg'), np.float)]) results = np.array(extn_results, dtype=results_dtype) if not c_api_timer: return results else: return results, api_time
def vpf_mocks(rmax, nbins, nspheres, numpN, threshold_ngb, centers_file, cosmology, RA, DEC, CZ, RAND_RA, RAND_DEC, RAND_CZ, verbose=False, is_comoving_dist=False, xbin_refine_factor=1, ybin_refine_factor=1, zbin_refine_factor=1, max_cells_per_dim=100, c_api_timer=False, isa=r'fastest'): """ Function to compute the counts-in-cells on points on the sky. Suitable for mock catalogs and observed galaxies. Returns a numpy structured array containing the probability of a sphere of radius up to ``rmax`` containing ``0--numpN-1`` galaxies. Parameters ---------- rmax : double Maximum radius of the sphere to place on the particles nbins : integer Number of bins in the counts-in-cells. Radius of first shell is rmax/nbins nspheres: integer (>= 0) Number of random spheres to place within the particle distribution. For a small number of spheres, the error is larger in the measured pN's. numpN: integer (>= 1) Governs how many unique pN's are to returned. If ``numpN`` is set to 1, then only the vpf (p0) is returned. For ``numpN=2``, p0 and p1 are returned. More explicitly, the columns in the results look like the following: ====== ========================== numpN Columns in output ====== ========================== 1 p0 2 p0 p1 3 p0 p1 p2 4 p0 p1 p2 p3 ====== ========================== and so on... .. note:: p0 is the vpf threshold_ngb: integer Minimum number of random points needed in a ``rmax`` sphere such that it is considered to be entirely within the mock footprint. The command-line version, ``mocks/vpf/vpf_mocks.c``, assumes that the minimum number of randoms can be at most a 1-sigma deviation from the expected random number density. centers_file: string, filename A file containing random sphere centers. If the file does not exist, then a list of random centers will be written out. In that case, the randoms arrays, ``RAND_RA``, ``RAND_DEC`` and ``RAND_CZ`` are used to check that the sphere is entirely within the footprint. If the file does exist but either ``rmax`` is too small or there are not enough centers then the file will be overwritten. .. note:: If the centers file has to be written, the code will take significantly longer to finish. However, subsequent runs can re-use that centers file and will be faster. cosmology: integer, required Integer choice for setting cosmology. Valid values are 1->LasDamas cosmology and 2->Planck cosmology. If you need arbitrary cosmology, easiest way is to convert the ``CZ`` values into co-moving distance, based on your preferred cosmology. Set ``is_comoving_dist=True``, to indicate that the co-moving distance conversion has already been done. Choices: 1. LasDamas cosmology. :math:`\\Omega_m=0.25`, :math:`\\Omega_\Lambda=0.75` 2. Planck cosmology. :math:`\\Omega_m=0.302`, :math:`\\Omega_\Lambda=0.698` To setup a new cosmology, add an entry to the function, ``init_cosmology`` in ``ROOT/utils/cosmology_params.c`` and re-install the entire package. RA: array-like, real (float/double) The array of Right Ascensions for the first set of points. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply RA's in [0.0, 360.0]. Calculations are done in the precision of the supplied arrays. DEC: array-like, real (float/double) Array of Declinations for the first set of points. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in [-90.0, 90.0]. Must be of same precision type as RA. CZ: array-like, real (float/double) Array of (Speed Of Light * Redshift) values for the first set of points. Code will try to detect cases where ``redshifts`` have been passed and multiply the entire array with the ``speed of light``. If ``is_comoving_dist`` is set, then ``CZ`` is interpreted as the co-moving distance, rather than (Speed Of Light * Redshift). RAND_RA: array-like, real (float/double) The array of Right Ascensions for the randoms. RA's are expected to be in [0.0, 360.0], but the code will try to fix cases where the RA's are in [-180, 180.0]. For peace of mind, always supply RA's in [0.0, 360.0]. Must be of same precision type as RA/DEC/CZ. RAND_DEC: array-like, real (float/double) Array of Declinations for the randoms. DEC's are expected to be in the [-90.0, 90.0], but the code will try to fix cases where the DEC's are in [0.0, 180.0]. Again, for peace of mind, always supply DEC's in [-90.0, 90.0]. Must be of same precision type as RA/DEC/CZ. RAND_CZ: array-like, real (float/double) Array of (Speed Of Light * Redshift) values for the randoms. Code will try to detect cases where ``redshifts`` have been passed and multiply the entire array with the ``speed of light``. If ``is_comoving_dist`` is set, then ``CZ2`` is interpreted as the co-moving distance, rather than ``(Speed Of Light * Redshift)``. .. note:: RAND_RA, RAND_DEC and RAND_CZ are only used when the ``centers_file`` needs to be written out. In that case, the RAND_RA, RAND_DEC, and RAND_CZ are used as random centers. verbose: boolean (default false) Boolean flag to control output of informational messages is_comoving_dist: boolean (default false) Boolean flag to indicate that ``cz`` values have already been converted into co-moving distances. This flag allows arbitrary cosmologies to be used in ``Corrfunc``. (xyz)bin_refine_factor: integer, default is (1,1,1); typically within [1-3] Controls the refinement on the cell sizes. Can have up to a 20% impact on runtime. .. note:: Since the counts in spheres calculation is symmetric in all 3 dimensions, the defaults are different from the clustering routines. max_cells_per_dim: integer, default is 100, typical values in [50-300] Controls the maximum number of cells per dimension. Total number of cells can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is too small relative to the boxsize (and increasing helps the runtime). c_api_timer: boolean (default false) Boolean flag to measure actual time spent in the C libraries. Here to allow for benchmarking and scaling studies. isa: string (default ``fastest``) Controls the runtime dispatch for the instruction set to use. Possible options are: [``fastest``, ``avx``, ``sse42``, ``fallback``] Setting isa to ``fastest`` will pick the fastest available instruction set on the current computer. However, if you set ``isa`` to, say, ``avx`` and ``avx`` is not available on the computer, then the code will revert to using ``fallback`` (even though ``sse42`` might be available). Unless you are benchmarking the different instruction sets, you should always leave ``isa`` to the default value. And if you *are* benchmarking, then the string supplied here gets translated into an ``enum`` for the instruction set defined in ``utils/defs.h``. Returns -------- results: Numpy structured array A numpy structured array containing [rmax, pN[numpN]] with ``nbins`` elements. Each row contains the maximum radius of the sphere and the ``numpN`` elements in the ``pN`` array. Each element of this array contains the probability that a sphere of radius ``rmax`` contains *exactly* ``N`` galaxies. For example, pN[0] (p0, the void probibility function) is the probability that a sphere of radius ``rmax`` contains 0 galaxies. if ``c_api_timer`` is set, then the return value is a tuple containing (results, api_time). ``api_time`` measures only the time spent within the C library and ignores all python overhead. Example -------- >>> from __future__ import print_function >>> import math >>> from os.path import dirname, abspath, join as pjoin >>> import numpy as np >>> import Corrfunc >>> from Corrfunc.mocks.vpf_mocks import vpf_mocks >>> rmax = 10.0 >>> nbins = 10 >>> numbins_to_print = nbins >>> nspheres = 10000 >>> numpN = 6 >>> threshold_ngb = 1 # does not matter since we have the centers >>> cosmology = 1 # LasDamas cosmology >>> centers_file = pjoin(dirname(abspath(Corrfunc.__file__)), ... "../mocks/tests/data/", ... "Mr19_centers_xyz_forVPF_rmax_10Mpc.txt") >>> N = 1000000 >>> boxsize = 420.0 >>> seed = 42 >>> np.random.seed(seed) >>> X = np.random.uniform(-0.5*boxsize, 0.5*boxsize, N) >>> Y = np.random.uniform(-0.5*boxsize, 0.5*boxsize, N) >>> Z = np.random.uniform(-0.5*boxsize, 0.5*boxsize, N) >>> CZ = np.sqrt(X*X + Y*Y + Z*Z) >>> inv_cz = 1.0/CZ >>> X *= inv_cz >>> Y *= inv_cz >>> Z *= inv_cz >>> DEC = 90.0 - np.arccos(Z)*180.0/math.pi >>> RA = (np.arctan2(Y, X)*180.0/math.pi) + 180.0 >>> results = vpf_mocks(rmax, nbins, nspheres, numpN, threshold_ngb, ... centers_file, cosmology, ... RA, DEC, CZ, ... RA, DEC, CZ, ... is_comoving_dist=True) >>> for r in results: ... print("{0:10.1f} ".format(r[0]), end="") ... # doctest: +NORMALIZE_WHITESPACE ... for pn in r[1]: ... print("{0:10.3f} ".format(pn), end="") ... # doctest: +NORMALIZE_WHITESPACE ... print("") # doctest: +NORMALIZE_WHITESPACE 1.0 0.999 0.001 0.000 0.000 0.000 0.000 2.0 0.992 0.007 0.001 0.000 0.000 0.000 3.0 0.982 0.009 0.005 0.002 0.001 0.000 4.0 0.975 0.006 0.006 0.005 0.003 0.003 5.0 0.971 0.004 0.003 0.003 0.004 0.003 6.0 0.967 0.003 0.003 0.001 0.003 0.002 7.0 0.962 0.004 0.002 0.003 0.002 0.001 8.0 0.958 0.004 0.002 0.003 0.001 0.002 9.0 0.953 0.003 0.003 0.002 0.003 0.001 10.0 0.950 0.003 0.002 0.002 0.001 0.002 """ try: from Corrfunc._countpairs_mocks import countspheres_vpf_mocks\ as vpf_extn except ImportError: msg = "Could not import the C extension for the Counts-in-Cells "\ " (vpf)" raise ImportError(msg) import numpy as np from warnings import warn from future.utils import bytes_to_native_str from Corrfunc.utils import translate_isa_string_to_enum,\ return_file_with_rbins, convert_to_native_endian,\ is_native_endian # Warn about non-native endian arrays if not all( is_native_endian(arr) for arr in [RA, DEC, CZ, RAND_RA, RAND_DEC, RAND_CZ]): warn( 'One or more input array has non-native endianness! A copy will be made with the correct endianness.' ) RA, DEC, CZ, RAND_RA, RAND_DEC, RAND_CZ = [ convert_to_native_endian(arr) for arr in RA, DEC, CZ, RAND_RA, RAND_DEC, RAND_CZ ] integer_isa = translate_isa_string_to_enum(isa) extn_results, api_time = vpf_extn(rmax, nbins, nspheres, numpN, threshold_ngb, centers_file, cosmology, RA, DEC, CZ, RAND_RA, RAND_DEC, RAND_CZ, verbose=verbose, is_comoving_dist=is_comoving_dist, xbin_refine_factor=xbin_refine_factor, ybin_refine_factor=ybin_refine_factor, zbin_refine_factor=zbin_refine_factor, max_cells_per_dim=max_cells_per_dim, c_api_timer=c_api_timer, isa=integer_isa) if extn_results is None: msg = "RuntimeError occurred" raise RuntimeError(msg) results_dtype = np.dtype([(bytes_to_native_str(b'rmax'), np.float), (bytes_to_native_str(b'pN'), (np.float, numpN))]) nbin = len(extn_results) results = np.zeros(nbin, dtype=results_dtype) for ii, r in enumerate(extn_results): results['rmax'][ii] = r[0] if numpN == 1: results['pN'] = r[1] else: for j in range(numpN): results['pN'][ii][j] = r[1 + j] if not c_api_timer: return results else: return results, api_time