Exemplo n.º 1
0
def DD(autocorr, nthreads, binfile, X1, Y1, Z1, weights1=None, periodic=True,
       X2=None, Y2=None, Z2=None, weights2=None, verbose=False, boxsize=0.0,
       output_ravg=False, xbin_refine_factor=2, ybin_refine_factor=2,
       zbin_refine_factor=1, max_cells_per_dim=100,
       c_api_timer=False, isa=r'fastest', weight_type=None):
    """
    Calculate the 3-D pair-counts corresponding to the real-space correlation
    function, :math:`\\xi(r)`.
    
    If ``weights`` are provided, the resulting pair counts are weighted.  The
    weighting scheme depends on ``weight_type``.


    .. note:: This module only returns pair counts and not the actual
       correlation function :math:`\\xi(r)`. See 
       :py:mod:`Corrfunc.utils.convert_3d_counts_to_cf` for computing 
       for computing :math:`\\xi(r)` from the pair counts returned.


    Parameters
    -----------

    autocorr: boolean, required
        Boolean flag for auto/cross-correlation. If autocorr is set to 1,
        then the second set of particle positions are not required.

    nthreads: integer
        The number of OpenMP threads to use. Has no effect if OpenMP was not
        enabled during library compilation.

    binfile: string or an list/array of floats
        For string input: filename specifying the ``r`` bins for
        ``DD``. The file should contain white-space separated values
        of (rmin, rmax)  for each ``r`` wanted. The bins need to be
        contiguous and sorted in increasing order (smallest bins come first).

        For array-like input: A sequence of ``r`` values that provides the
        bin-edges. For example,
        ``np.logspace(np.log10(0.1), np.log10(10.0), 15)`` is a valid
        input specifying **14** (logarithmic) bins between 0.1 and 10.0. This
        array does not need to be sorted.         
    
    X1/Y1/Z1: array_like, real (float/double)
        The array of X/Y/Z positions for the first set of points.
        Calculations are done in the precision of the supplied arrays.
        
    weights1: array_like, real (float/double), optional
        A scalar, or an array of weights of shape (n_weights, n_positions) or (n_positions,).
        `weight_type` specifies how these weights are used; results are returned
        in the `weightavg` field.  If only one of weights1 and weights2 is
        specified, the other will be set to uniform weights.

    periodic: boolean
       Boolean flag to indicate periodic boundary conditions.

    X2/Y2/Z2: array-like, real (float/double)
       Array of XYZ positions for the second set of points. *Must* be the same
       precision as the X1/Y1/Z1 arrays. Only required when ``autocorr==0``.
       
    weights2: array-like, real (float/double), optional
        Same as weights1, but for the second set of positions

    verbose: boolean (default false)
       Boolean flag to control output of informational messages

    boxsize: double
        The side-length of the cube in the cosmological simulation.
        Present to facilitate exact calculations for periodic wrapping.
        If boxsize is not supplied, then the wrapping is done based on
        the maximum difference within each dimension of the X/Y/Z arrays.

    output_ravg: boolean (default false)
       Boolean flag to output the average ``r`` for each bin. Code will
       run slower if you set this flag. 

       Note: If you are calculating in single-precision, ``ravg`` will 
       suffer from numerical loss of precision and can not be trusted. 
       If you need accurate ``ravg`` values, then pass in double precision 
       arrays for the particle positions.


    (xyz)bin_refine_factor: integer, default is (2,2,1); typically within [1-3]
       Controls the refinement on the cell sizes. Can have up to a 20% impact
       on runtime.

    max_cells_per_dim: integer, default is 100, typical values in [50-300]
       Controls the maximum number of cells per dimension. Total number of
       cells can be up to (max_cells_per_dim)^3. Only increase if ``rmax`` is
       too small relative to the boxsize (and increasing helps the runtime).

    c_api_timer: boolean (default false)
       Boolean flag to measure actual time spent in the C libraries. Here
       to allow for benchmarking and scaling studies.

    isa: string (default ``fastest``)
       Controls the runtime dispatch for the instruction set to use. Possible
       options are: [``fastest``, ``avx``, ``sse42``, ``fallback``]

       Setting isa to ``fastest`` will pick the fastest available instruction
       set on the current computer. However, if you set ``isa`` to, say,
       ``avx`` and ``avx`` is not available on the computer, then the code will
       revert to using ``fallback`` (even though ``sse42`` might be available).

       Unless you are benchmarking the different instruction sets, you should
       always leave ``isa`` to the default value. And if you *are*
       benchmarking, then the string supplied here gets translated into an
       ``enum`` for the instruction set defined in ``utils/defs.h``.
    
    weight_type: string, optional
        The type of weighting to apply.  One of ["pair_product", None].  Default: None.

    Returns
    --------

    results: Numpy structured array
       A numpy structured array containing [rmin, rmax, ravg, npairs, weightavg]
       for each radial bin specified in the ``binfile``. If ``output_ravg`` is
       not set, then ``ravg`` will be set to 0.0 for all bins; similarly for
       ``weightavg``. ``npairs`` contains the number of pairs in that bin and can
       be used to compute the actual :math:`\\xi(r)` by combining with (DR, RR) counts.

    api_time: float, optional
       Only returned if ``c_api_timer`` is set.  ``api_time`` measures only the time
       spent within the C library and ignores all python overhead.

    Example
    --------

    >>> from __future__ import print_function
    >>> import numpy as np
    >>> from os.path import dirname, abspath, join as pjoin
    >>> import Corrfunc
    >>> from Corrfunc.theory.DD import DD
    >>> binfile = pjoin(dirname(abspath(Corrfunc.__file__)),
    ...                 "../theory/tests/", "bins")
    >>> N = 10000
    >>> boxsize = 420.0
    >>> nthreads = 4
    >>> autocorr = 1
    >>> seed = 42
    >>> np.random.seed(seed)
    >>> X = np.random.uniform(0, boxsize, N)
    >>> Y = np.random.uniform(0, boxsize, N)
    >>> Z = np.random.uniform(0, boxsize, N)
    >>> weights = np.ones_like(X)
    >>> results = DD(autocorr, nthreads, binfile, X, Y, Z, weights1=weights, weight_type='pair_product', output_ravg=True)
    >>> for r in results: print("{0:10.6f} {1:10.6f} {2:10.6f} {3:10d} {4:10.6f}".
    ...                         format(r['rmin'], r['rmax'], r['ravg'],
    ...                         r['npairs'], r['weightavg'])) # doctest: +NORMALIZE_WHITESPACE
      0.167536   0.238755   0.000000          0   0.000000
      0.238755   0.340251   0.000000          0   0.000000
      0.340251   0.484892   0.000000          0   0.000000
      0.484892   0.691021   0.000000          0   0.000000
      0.691021   0.984777   0.945372          2   1.000000
      0.984777   1.403410   1.340525         10   1.000000
      1.403410   2.000000   1.732968         36   1.000000
      2.000000   2.850200   2.558878         54   1.000000
      2.850200   4.061840   3.564959        208   1.000000
      4.061840   5.788530   4.999278        674   1.000000
      5.788530   8.249250   7.126673       2154   1.000000
      8.249250  11.756000  10.201834       5996   1.000000
     11.756000  16.753600  14.517830      17746   1.000000
     16.753600  23.875500  20.716017      50252   1.000000

    """
    try:
        from Corrfunc._countpairs import countpairs as DD_extn
    except ImportError:
        msg = "Could not import the C extension for the 3-D "\
              "real-space pair counter."
        raise ImportError(msg)

    import numpy as np
    from warnings import warn
    from Corrfunc.utils import translate_isa_string_to_enum,\
        return_file_with_rbins, convert_to_native_endian,\
        is_native_endian
    from future.utils import bytes_to_native_str
    
    # Broadcast scalar weights to arrays
    if weights1 is not None:
        weights1 = np.atleast_1d(weights1)
    if weights2 is not None:
        weights2 = np.atleast_1d(weights2)
        
    if not autocorr:
        if X2 is None or Y2 is None or Z2 is None:
            msg = "Must pass valid arrays for X2/Y2/Z2 for "\
                  "computing cross-correlation"
            raise ValueError(msg)
            
        # If only one set of points has weights, set the other to uniform weights
        if weights1 is None and weights2 is not None:
            weights1 = np.ones_like(weights2)
        if weights2 is None and weights1 is not None:
            weights2 = np.ones_like(weights1)
            
    # Warn about non-native endian arrays
    if not all(is_native_endian(arr) for arr in [X1, Y1, Z1, weights1, X2, Y2, Z2, weights2]):
        warn('One or more input array has non-native endianness!  A copy will be made with the correct endianness.')
    X1, Y1, Z1, weights1, X2, Y2, Z2, weights2 = [convert_to_native_endian(arr) for arr in [X1, Y1, Z1, weights1, X2, Y2, Z2, weights2]]
        
    # Passing None parameters breaks the parsing code, so avoid this
    kwargs = {}
    for k in ['weights1', 'weights2', 'weight_type', 'X2', 'Y2', 'Z2']:
        v = locals()[k]
        if v is not None:
            kwargs[k] = v

    integer_isa = translate_isa_string_to_enum(isa)
    rbinfile, delete_after_use = return_file_with_rbins(binfile)
    extn_results, api_time = DD_extn(autocorr, nthreads, rbinfile,
                                     X1, Y1, Z1,
                                     periodic=periodic,
                                     verbose=verbose,
                                     boxsize=boxsize,
                                     output_ravg=output_ravg,
                                     xbin_refine_factor=xbin_refine_factor,
                                     ybin_refine_factor=ybin_refine_factor,
                                     zbin_refine_factor=zbin_refine_factor,
                                     max_cells_per_dim=max_cells_per_dim,
                                     c_api_timer=c_api_timer,
                                     isa=integer_isa, **kwargs)
    if extn_results is None:
        msg = "RuntimeError occurred"
        raise RuntimeError(msg)

    if delete_after_use:
        import os
        os.remove(rbinfile)

    results_dtype = np.dtype([(bytes_to_native_str(b'rmin'), np.float),
                              (bytes_to_native_str(b'rmax'), np.float),
                              (bytes_to_native_str(b'ravg'), np.float),
                              (bytes_to_native_str(b'npairs'), np.uint64),
                              (bytes_to_native_str(b'weightavg'), np.float)])
    results = np.array(extn_results, dtype=results_dtype)
    if not c_api_timer:
        return results
    else:
        return results, api_time
Exemplo n.º 2
0
def main():
    tstart = time.time()
    t0 = tstart
    x, y, z = read_catalog()
    boxsize = 420.0
    t1 = time.time()
    print("Done reading the data - time taken = {0:10.1f} seconds".format(t1 -
                                                                          t0))

    numbins_to_print = 5

    print("Beginning Theory Correlation functions calculations")
    nthreads = 4
    pimax = 40.0
    binfile = pjoin(dirname(abspath(Corrfunc.__file__)), "../theory/tests/",
                    "bins")
    autocorr = 1
    periodic = 1

    print("Running 3-D correlation function DD(r)")
    results_DD, _ = DD_extn(autocorr,
                            nthreads,
                            binfile,
                            x,
                            y,
                            z,
                            weights1=np.ones_like(x),
                            weight_type='pair_product',
                            verbose=True,
                            periodic=periodic,
                            boxsize=boxsize)
    print("\n#      **** DD(r): first {0} bins  *******       ".format(
        numbins_to_print))
    print("#      rmin        rmax       rpavg       npairs    weightavg")
    print("#############################################################")
    for ibin in range(numbins_to_print):
        items = results_DD[ibin]
        print("{0:12.4f} {1:12.4f} {2:10.4f} {3:10d} {4:10.4f}".format(
            items[0], items[1], items[2], items[3], items[4]))
    print("-------------------------------------------------------------")

    print("\nRunning 2-D correlation function DD(rp,pi)")
    results_DDrppi, _ = DDrppi_extn(autocorr,
                                    nthreads,
                                    pimax,
                                    binfile,
                                    x,
                                    y,
                                    z,
                                    weights1=np.ones_like(x),
                                    weight_type='pair_product',
                                    verbose=True,
                                    periodic=periodic,
                                    boxsize=boxsize)
    print("\n#            ****** DD(rp,pi): first {0} bins  *******      ".
          format(numbins_to_print))
    print(
        "#      rmin        rmax       rpavg     pi_upper     npairs    weightavg"
    )
    print(
        "########################################################################"
    )
    for ibin in range(numbins_to_print):
        items = results_DDrppi[ibin]
        print(
            "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format(
                items[0], items[1], items[2], items[3], items[4], items[5]))
    print(
        "------------------------------------------------------------------------"
    )

    print("\nRunning 2-D projected correlation function wp(rp)")
    results_wp, _, _ = wp_extn(boxsize,
                               pimax,
                               nthreads,
                               binfile,
                               x,
                               y,
                               z,
                               weights=np.ones_like(x),
                               weight_type='pair_product',
                               verbose=True)
    print(
        "\n#            ******    wp: first {0} bins  *******         ".format(
            numbins_to_print))
    print(
        "#      rmin        rmax       rpavg        wp       npairs    weightavg"
    )
    print(
        "#######################################################################"
    )
    for ibin in range(numbins_to_print):
        items = results_wp[ibin]
        print(
            "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format(
                items[0], items[1], items[2], items[3], items[4], items[5]))
    print(
        "-----------------------------------------------------------------------"
    )

    print("\nRunning 3-D auto-correlation function xi(r)")
    results_xi, _ = xi_extn(boxsize,
                            nthreads,
                            binfile,
                            x,
                            y,
                            z,
                            weights=np.ones_like(x),
                            weight_type='pair_product',
                            verbose=True)

    print(
        "\n#            ******    xi: first {0} bins  *******         ".format(
            numbins_to_print))
    print(
        "#      rmin        rmax       rpavg        xi       npairs    weightavg"
    )
    print(
        "#######################################################################"
    )
    for ibin in range(numbins_to_print):
        items = results_xi[ibin]
        print(
            "{0:12.4f} {1:12.4f} {2:10.4f} {3:10.1f} {4:10d} {5:10.4f}".format(
                items[0], items[1], items[2], items[3], items[4], items[5]))
    print(
        "-----------------------------------------------------------------------"
    )
    print("Done with all four correlation calculations.")

    print("\nRunning VPF pN(r)")
    rmax = 10.0
    nbin = 10
    nspheres = 10000
    num_pN = 3
    seed = -1
    results_vpf, _ = vpf_extn(rmax,
                              nbin,
                              nspheres,
                              num_pN,
                              seed,
                              x,
                              y,
                              z,
                              verbose=True,
                              periodic=periodic,
                              boxsize=boxsize)

    print(
        "\n#            ******    pN: first {0} bins  *******         ".format(
            numbins_to_print))
    print('#       r    ', end="")

    for ipn in range(num_pN):
        print('        p{0:0d}      '.format(ipn), end="")

    print("")

    print("###########", end="")
    for ipn in range(num_pN):
        print('################', end="")
    print("")

    for ibin in range(numbins_to_print):
        items = results_vpf[ibin]
        print('{0:10.2f} '.format(items[0]), end="")
        for ipn in range(num_pN):
            print(' {0:15.4e}'.format(items[ipn + 1]), end="")
        print("")

    print("-----------------------------------------------------------")

    tend = time.time()
    print("Done with all functions. Total time taken = {0:10.1f} seconds. \
    Read-in time = {1:10.1f} seconds.".format(tend - tstart, t1 - t0))