示例#1
0
文件: fkp.py 项目: twobombs/nbodykit
 def __setstate_pre000305__(self, state):
     """ compatible version of setstate for files generated before 0.3.5 """
     edges = state['edges']
     self.attrs = state['attrs']
     self.poles = BinnedStatistic(['k'], [edges],
                                  state['poles'],
                                  fields_to_sum=['modes'])
示例#2
0
    def _make_datasets(self, edges, poles, power, coords, attrs):

        if self.attrs['mode'] == '1d':
            power = BinnedStatistic(['k'],
                                    edges,
                                    power,
                                    fields_to_sum=['modes'],
                                    coords=coords,
                                    **attrs)
        else:
            power = BinnedStatistic(['k', 'mu'],
                                    edges,
                                    power,
                                    fields_to_sum=['modes'],
                                    coords=coords,
                                    **attrs)

        if poles is not None:
            poles = BinnedStatistic(['k'], [power.edges['k']],
                                    poles,
                                    fields_to_sum=['modes'],
                                    coords=[power.coords['k']],
                                    **attrs)

        return power, poles
示例#3
0
def test_2d_load(comm):

    # load plaintext format
    with pytest.warns(FutureWarning):
        ds1 = BinnedStatistic.from_plaintext(['k', 'mu'], os.path.join(data_dir, 'dataset_2d_deprecated.dat'))

    # load from JSON
    ds2 = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # same data?
    for name in ds1:
        testing.assert_almost_equal(ds1[name], ds2[name])
示例#4
0
def test_2d_load(comm):

    # load plaintext format
    with pytest.warns(FutureWarning):
        ds1 = BinnedStatistic.from_plaintext(['k', 'mu'], os.path.join(data_dir, 'dataset_2d_deprecated.dat'))

    # load from JSON
    ds2 = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # same data?
    for name in ds1:
        testing.assert_almost_equal(ds1[name], ds2[name])
示例#5
0
    def __setstate__(self, state):

        edges = state.pop('edges')
        dims = state.pop('dims')
        self.__dict__.update(state)

        self.corr = BinnedStatistic(dims, edges, self.corr)
        if self.wp is not None:
            # NOTE: only edges[0], second dimension was summed over
            self.wp = BinnedStatistic(dims[:1], edges[:1], self.wp)

        for pc in ['D1D2', 'D1R2', 'D2R1', 'R1R2']:
            val = getattr(self, pc)
            if val is not None:
                setattr(self, pc, BinnedStatistic(dims, edges, val))
示例#6
0
    def run(self):
        """
        Compute the power spectrum multipoles. This function does not return
        anything, but adds several attributes (see below).

        Attributes
        ----------
        edges : array_like
            the edges of the wavenumber bins
        poles : :class:`~nbodykit.binned_statistic.BinnedStatistic`
            a BinnedStatistic object that behaves similar to a structured array, with
            fancy slicing and re-indexing; it holds the measured multipole
            results, as well as the number of modes (``modes``) and average
            wavenumbers values in each bin (``k``)
        attrs : dict
            dictionary holding input parameters and several important quantites
            computed during execution:

            #. data.N, randoms.N :
                the unweighted number of data and randoms objects
            #. data.W, randoms.W :
                the weighted number of data and randoms objects, using the
                column specified as the completeness weights
            #. alpha :
                the ratio of ``data.W`` to ``randoms.W``
            #. data.norm, randoms.norm :
                the normalization of the power spectrum, computed from either
                the "data" or "randoms" catalog (they should be similar).
                See equations 13 and 14 of arxiv:1312.4611.
            #. data.shotnoise, randoms.shotnoise :
                the shot noise values for the "data" and "random" catalogs;
                See equation 15 of arxiv:1312.4611.
            #. shotnoise :
                the total shot noise for the power spectrum, equal to
                ``data.shotnoise`` + ``randoms.shotnoise``; this should be subtracted from
                the monopole.
            #. BoxSize :
                the size of the Cartesian box used to grid the data and
                randoms objects on a Cartesian mesh.

            For further details on the meta-data, see
            :ref:`the documentation <fkp-meta-data>`.
        """
        pm = self.first.pm

        # setup the binning in k out to the minimum nyquist frequency
        dk = 2 * numpy.pi / pm.BoxSize.min(
        ) if self.attrs['dk'] is None else self.attrs['dk']
        self.edges = numpy.arange(
            self.attrs['kmin'],
            numpy.pi * pm.Nmesh.min() / pm.BoxSize.max() + dk / 2, dk)

        # measure the binned 1D multipoles in Fourier space
        poles = self._compute_multipoles()

        # set all the necessary results
        self.poles = BinnedStatistic(['k'], [self.edges],
                                     poles,
                                     fields_to_sum=['modes'],
                                     **self.attrs)
示例#7
0
def _load(kind, box=None):
    """
    Internal function to load the QPM results.
    """
    d = os.environ['THESIS_DIR']
    d = os.path.join(d, 'boss_dr12_mocks', 'Results', 'QPM', 'nbodykit',
                     'redshift', kind)

    # the pattern
    box = "*" if box is None else "%04d" % box
    if kind == 'power':
        files = glob(
            os.path.join(d,
                         f"pkmu_qpm_unscaled_{box}_0.6452_dk005_Nmu100.dat"))
        dims = ['k', 'mu']
        names = ['k', 'mu', 'power', 'modes']
    else:
        files = glob(
            os.path.join(d,
                         f"poles_qpm_unscaled_{box}_0.6452_dk005_Nmu100.dat"))
        dims = ['k']
        names = ['k', 'power_0', 'power_2', 'power_4', 'modes']

    toret = []
    for f in files:
        d = BinnedStatistic.from_plaintext(dims, f)
        if kind == 'poles':
            for i, name in enumerate(names):
                d.rename_variable('col_%d' % i, name)
        toret.append(d)

    return toret
示例#8
0
def test_reindex(comm):
    import warnings

    dataset = BinnedStatistic.from_json(
        os.path.join(data_dir, 'dataset_2d.json'))

    with pytest.raises(ValueError):
        new, spacing = dataset.reindex('k',
                                       0.005,
                                       force=True,
                                       return_spacing=True)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)

        weights = numpy.random.random(dataset.shape)
        dataset['weights'] = weights
        new, spacing = dataset.reindex('k',
                                       0.02,
                                       weights='weights',
                                       force=True,
                                       return_spacing=True)

        diff = numpy.diff(new.coords['k'])
        assert numpy.alltrue(diff > numpy.diff(dataset.coords['k'])[0])

        with pytest.raises(ValueError):
            new = dataset.reindex('mu', 0.4, force=False)
        new = dataset.reindex('mu', 0.4, force=True)
示例#9
0
def test_copy(comm):

    dataset = BinnedStatistic.from_json(
        os.path.join(data_dir, 'dataset_2d.json'))
    copy = dataset.copy()
    for var in dataset:
        testing.assert_array_equal(dataset[var], copy[var])
示例#10
0
def test_average(comm):
    import warnings

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # unweighted
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)

        avg = dataset.average('mu')
        for var in dataset.variables:
            if var in dataset._fields_to_sum:
                x = numpy.nansum(dataset[var], axis=-1)
            else:
                x = numpy.nanmean(dataset[var], axis=-1)
            testing.assert_allclose(x, avg[var])

        # weighted
        weights = numpy.random.random(dataset.shape)
        dataset['weights'] = weights
        avg = dataset.average('mu', weights='weights')

        for var in dataset:
            if var in dataset._fields_to_sum:
                x = numpy.nansum(dataset[var], axis=-1)
            else:
                x = numpy.nansum(dataset[var]*dataset['weights'], axis=-1)
                x /= dataset['weights'].sum(axis=-1)
            testing.assert_allclose(x, avg[var])
示例#11
0
def test_to_json(comm):

    # load from JSON
    ds1 = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_1d.json'))

    # to JSON
    with tempfile.NamedTemporaryFile(delete=False) as ff:
        ds1.to_json(ff.name)
    ds2 = BinnedStatistic.from_json(ff.name)

    # same data?
    for name in ds1:
        testing.assert_almost_equal(ds1[name], ds2[name])

    # cleanup
    os.remove(ff.name)
示例#12
0
def test_average(comm):
    import warnings

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # unweighted
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)

        avg = dataset.average('mu')
        for var in dataset.variables:
            if var in dataset._fields_to_sum:
                x = numpy.nansum(dataset[var], axis=-1)
            else:
                x = numpy.nanmean(dataset[var], axis=-1)
            testing.assert_allclose(x, avg[var])

        # weighted
        weights = numpy.random.random(dataset.shape)
        dataset['weights'] = weights
        avg = dataset.average('mu', weights='weights')

        for var in dataset:
            if var in dataset._fields_to_sum:
                x = numpy.nansum(dataset[var], axis=-1)
            else:
                x = numpy.nansum(dataset[var]*dataset['weights'], axis=-1)
                x /= dataset['weights'].sum(axis=-1)
            testing.assert_allclose(x, avg[var])
示例#13
0
def NaturalEstimator(data_paircount):
    """
    Internal function to computing the correlation function using
    analytic randoms and the so-called "natural" correlation function
    estimator, :math:`DD/RR - 1`.
    """
    # data1 x data2
    D1D2 = data_paircount.pairs
    attrs = data_paircount.attrs

    # determine the sample sizes
    ND1, ND2 = attrs['N1'], attrs['N2']
    edges = D1D2.edges
    mode = attrs['mode']
    BoxSize = attrs['BoxSize']

    # analytic randoms - randoms calculation assuming uniform distribution
    _R1R2 = AnalyticUniformRandoms(mode, edges, BoxSize)(ND1, ND2)
    edges = [D1D2.edges[d] for d in D1D2.dims]
    R1R2 = BinnedStatistic(D1D2.dims, edges, _R1R2.view([('npairs', 'f8')]))

    # and compute the correlation function as DD/RR - 1
    CF = (D1D2['npairs'] * D1D2['weightavg']) / R1R2['npairs'] - 1.

    # create a BinnedStatistic holding the CF
    CF = _create_tpcf_result(D1D2, CF)

    return R1R2, CF
示例#14
0
def test_to_json(comm):

    # load from JSON
    ds1 = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_1d.json'))

    # to JSON
    with tempfile.NamedTemporaryFile(delete=False) as ff:
        ds1.to_json(ff.name)
    ds2 = BinnedStatistic.from_json(ff.name)

    # same data?
    for name in ds1:
        testing.assert_almost_equal(ds1[name], ds2[name])

    # cleanup
    os.remove(ff.name)
示例#15
0
    def __setstate__(self, state):
        self.__dict__.update(state)
        edges = self.attrs['edges']

        # reconstruct the result based on mode
        kws = {'fields_to_sum': ['npairs']}
        if self.attrs['mode'] == '1d':
            dims, edges = ['r'], [edges]

        elif self.attrs['mode'] == '2d':
            muedges = numpy.linspace(0, 1., self.attrs['Nmu'] + 1)
            dims, edges = ['r', 'mu'], [edges, muedges]

        elif self.attrs['mode'] == 'projected':
            piedges = numpy.linspace(0, self.attrs['pimax'],
                                     self.attrs['pimax'] + 1)
            dims, edges = ['rp', 'pi'], [edges, piedges]

        elif self.attrs['mode'] == 'angular':
            dims, edges = ['theta'], [edges]

        else:
            valid = ['1d', '2d', 'angular', 'projected']
            args = (self.attrs['mode'], valid)
            raise ValueError("mode = '%s' should be one of %s" % args)

        # save the result as a BinnedStatistic
        self.pairs = BinnedStatistic(dims, edges, self.pairs, **kws)
示例#16
0
def test_sel(comm):

    dataset = BinnedStatistic.from_json(
        os.path.join(data_dir, 'dataset_2d.json'))

    # no exact match fails
    with pytest.raises(IndexError):
        sliced = dataset.sel(k=0.1)

    # this should be squeezed
    sliced = dataset.sel(k=0.1, method='nearest')
    assert len(sliced.dims) == 1

    # this is not squeezed
    sliced = dataset.sel(k=[0.1], method='nearest')
    assert sliced.shape[0] == 1

    # this return empty k with arbitary edges.
    sliced = dataset.sel(k=[], method='nearest')
    assert sliced.shape[0] == 0

    # slice in a specific k-range
    sliced = dataset.sel(k=slice(0.02, 0.15), mu=[0.5], method='nearest')
    assert sliced.shape[1] == 1
    assert numpy.alltrue((sliced['k'] >= 0.02) & (sliced['k'] <= 0.15))
示例#17
0
def test_rename_variable(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))
    test = numpy.zeros(dataset.shape)
    dataset['test'] = test

    dataset.rename_variable('test', 'renamed_test')
    assert 'renamed_test' in dataset
    assert 'test' not in dataset
示例#18
0
    def to_pkmu(self, mu_edges, max_ell):
        """
        Invert the measured multipoles :math:`P_\ell(k)` into power
        spectrum wedges, :math:`P(k,\mu)`.

        Parameters
        ----------
        mu_edges : array_like
            the edges of the :math:`\mu` bins
        max_ell : int
            the maximum multipole to use when computing the wedges;
            all even multipoles with :math:`ell` less than or equal
            to this number are included

        Returns
        -------
        pkmu : BinnedStatistic
            a data set holding the :math:`P(k,\mu)` wedges
        """
        from scipy.special import legendre
        from scipy.integrate import quad

        def compute_coefficient(ell, mumin, mumax):
            """
            Compute how much each multipole contributes to a given wedges.
            This returns:

            .. math::
                \frac{1}{\mu_{max} - \mu_{max}} \int_{\mu_{min}}^{\mu^{max}} \mathcal{L}_\ell(\mu)
            """
            norm = 1.0 / (mumax - mumin)
            return norm * quad(lambda mu: legendre(ell)(mu), mumin, mumax)[0]

        # make sure we have all the poles measured
        ells = list(range(0, max_ell+1, 2))
        if any('power_%d' %ell not in self.poles for ell in ells):
            raise ValueError("measurements for ells=%s required if max_ell=%d" %(ells, max_ell))

        # new data array
        dtype = numpy.dtype([('power', 'c8'), ('k', 'f8'), ('mu', 'f8')])
        data = numpy.zeros((self.poles.shape[0], len(mu_edges)-1), dtype=dtype)

        # loop over each wedge
        bounds = list(zip(mu_edges[:-1], mu_edges[1:]))
        for imu, mulims in enumerate(bounds):

            # add the contribution from each Pell
            for ell in ells:
                coeff = compute_coefficient(ell, *mulims)
                data['power'][:,imu] += coeff * self.poles['power_%d' %ell]

            data['k'][:,imu] = self.poles['k']
            data['mu'][:,imu] = numpy.ones(len(data))*0.5*(mulims[1]+mulims[0])

        dims = ['k', 'mu']
        edges = [self.poles.edges['k'], mu_edges]
        return BinnedStatistic(dims=dims, edges=edges, data=data, coords=[self.poles.coords['k'], None], **self.attrs)
示例#19
0
def test_rename_variable(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))
    test = numpy.zeros(dataset.shape)
    dataset['test'] = test

    dataset.rename_variable('test', 'renamed_test')
    assert 'renamed_test' in dataset
    assert 'test' not in dataset
示例#20
0
    def _make_datasets(self):

        if self.attrs['mode'] == '1d':
            self.power = BinnedStatistic(['k'], [self.edges],
                                         self.power,
                                         fields_to_sum=['modes'],
                                         **self.attrs)
        else:
            self.power = BinnedStatistic(['k', 'mu'],
                                         self.edges,
                                         self.power,
                                         fields_to_sum=['modes'],
                                         **self.attrs)
        if self.poles is not None:
            self.poles = BinnedStatistic(['k'], [self.power.edges['k']],
                                         self.poles,
                                         fields_to_sum=['modes'],
                                         **self.attrs)
示例#21
0
def _create_tpcf_result(D1D2, CF):
    """
    Create a BinnedStatistic holding the correlation function
    and average bin separation.
    """
    x = D1D2.dims[0]
    data = numpy.empty_like(CF, dtype=[('corr', 'f8'), (x, 'f8')])
    data['corr'] = CF[:]
    data[x] = D1D2[x]
    edges = [D1D2.edges[d] for d in D1D2.dims]
    return BinnedStatistic(D1D2.dims, edges, data)
示例#22
0
def test_list_array_slice(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # get the first and last mu column
    sliced = dataset[:,[0, -1]]
    assert len(sliced.shape) == 2
    assert sliced.dims == ['k', 'mu']

    # make sure we grabbed the right data
    for var in dataset:
        testing.assert_array_equal(dataset[var][:,[0,-1]], sliced[var])
示例#23
0
def test_list_array_slice(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # get the first and last mu column
    sliced = dataset[:,[0, -1]]
    assert len(sliced.shape) == 2
    assert sliced.dims == ['k', 'mu']

    # make sure we grabbed the right data
    for var in dataset:
        testing.assert_array_equal(dataset[var][:,[0,-1]], sliced[var])
示例#24
0
def test_str(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # list all variable names
    s = str(dataset)

    # now just list total number of variables
    dataset['test1'] = numpy.ones(dataset.shape)
    dataset['test2'] = numpy.ones(dataset.shape)
    s = str(dataset)

    # this is the same as str
    r = repr(dataset)
示例#25
0
def test_str(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # list all variable names
    s = str(dataset)

    # now just list total number of variables
    dataset['test1'] = numpy.ones(dataset.shape)
    dataset['test2'] = numpy.ones(dataset.shape)
    s = str(dataset)

    # this is the same as str
    r = repr(dataset)
示例#26
0
def test_array_slice(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # get the first mu column
    sliced = dataset[:,0]
    assert sliced.shape[0] == dataset.shape[0]
    assert len(sliced.shape) == 1
    assert sliced.dims == ['k']

    # get the first mu column but keep dimension
    sliced = dataset[:,[0]]
    assert sliced.shape[0] == dataset.shape[0]
    assert sliced.shape[1] == 1
    assert sliced.dims == ['k', 'mu']
示例#27
0
def test_array_slice(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # get the first mu column
    sliced = dataset[:,0]
    assert sliced.shape[0] == dataset.shape[0]
    assert len(sliced.shape) == 1
    assert sliced.dims == ['k']

    # get the first mu column but keep dimension
    sliced = dataset[:,[0]]
    assert sliced.shape[0] == dataset.shape[0]
    assert sliced.shape[1] == 1
    assert sliced.dims == ['k', 'mu']
示例#28
0
def test_variable_set(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))
    modes = numpy.ones(dataset.shape)

    # add new variable
    dataset['TEST'] = modes
    assert 'TEST' in dataset

    # override existing variable
    dataset['modes'] = modes
    assert numpy.all(dataset['modes'] == 1.0)

    # needs right shape
    with pytest.raises(ValueError):
        dataset['TEST'] = 10.
示例#29
0
def test_variable_set(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))
    modes = numpy.ones(dataset.shape)

    # add new variable
    dataset['TEST'] = modes
    assert 'TEST' in dataset

    # override existing variable
    dataset['modes'] = modes
    assert numpy.all(dataset['modes'] == 1.0)

    # needs right shape
    with pytest.raises(ValueError):
        dataset['TEST'] = 10.
示例#30
0
def test_squeeze(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # need to specify which dimension to squeeze
    with pytest.raises(ValueError):
        squeezed = dataset.squeeze()
    with pytest.raises(ValueError):
        squeezed = dataset[[0],[0]].squeeze()

    sliced = dataset[:,[2]]
    with pytest.raises(ValueError):
        squeezed = sliced.squeeze('k')
    squeezed = sliced.squeeze('mu')

    assert len(squeezed.dims) == 1
    assert squeezed.shape[0] == sliced.shape[0]
示例#31
0
def test_squeeze(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # need to specify which dimension to squeeze
    with pytest.raises(ValueError):
        squeezed = dataset.squeeze()
    with pytest.raises(ValueError):
        squeezed = dataset[[0],[0]].squeeze()

    sliced = dataset[:,[2]]
    with pytest.raises(ValueError):
        squeezed = sliced.squeeze('k')
    squeezed = sliced.squeeze('mu')

    assert len(squeezed.dims) == 1
    assert squeezed.shape[0] == sliced.shape[0]
示例#32
0
    def to_poles(self, poles):
        r"""
        Invert the measured wedges :math:`\xi(r,mu)` into correlation
        multipoles, :math:`\xi_\ell(r)`.

        To select a mu_range, use

        .. code:: python

            poles = self.sel(mu=slice(*mu_range), method='nearest').to_poles(poles)

        Parameters
        ----------
        poles: array_like
            the list of multipoles to compute

        Returns
        -------
        xi_ell : BinnedStatistic
            a data set holding the :math:`\xi_\ell(r)` multipoles
        """
        from scipy.special import legendre
        from scipy.integrate import quad

        # new data array
        x = str(self.dims[0])
        dtype = numpy.dtype([(x, 'f8')] + [('corr_%d' % ell, 'f8')
                                           for ell in poles])
        data = numpy.zeros((self.shape[0]), dtype=dtype)
        dims = [x]
        edges = [self.edges[x]]

        # FIXME: use something fancier than the central point.
        mu_bins = numpy.diff(self.edges['mu'])
        mu_mid = (self.edges['mu'][1:] + self.edges['mu'][:-1]) / 2.

        for ell in poles:
            legendrePolynomial = (2. * ell + 1.) * legendre(ell)(mu_mid)
            data['corr_%d' %
                 ell] = numpy.sum(self['corr'] * legendrePolynomial * mu_bins,
                                  axis=-1) / numpy.sum(mu_bins)

        data[x] = numpy.mean(self[x], axis=-1)

        return BinnedStatistic(dims=dims, edges=edges, data=data, poles=poles)
示例#33
0
def test_take(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    sliced = dataset.take(k=[8])
    assert sliced.shape[0] == 1
    assert len(sliced.dims) == 2

    sliced = dataset.take(k=[])
    assert sliced.shape[0] == 0
    assert len(sliced.dims) == 2

    dataset.take(k=dataset.coords['k'] < 0.3)
    assert len(sliced.dims) == 2

    dataset.take(dataset['modes'] > 0)
    assert len(sliced.dims) == 2

    dataset.take(dataset['k'] < 0.3)
    assert len(sliced.dims) == 2
示例#34
0
def test_take(comm):

    dataset = BinnedStatistic.from_json(
        os.path.join(data_dir, 'dataset_2d.json'))

    sliced = dataset.take(k=[8])
    assert sliced.shape[0] == 1
    assert len(sliced.dims) == 2

    sliced = dataset.take(k=[])
    assert sliced.shape[0] == 0
    assert len(sliced.dims) == 2

    dataset.take(k=dataset.coords['k'] < 0.3)
    assert len(sliced.dims) == 2

    dataset.take(dataset['modes'] > 0)
    assert len(sliced.dims) == 2

    dataset.take(dataset['k'] < 0.3)
    assert len(sliced.dims) == 2
示例#35
0
def test_reindex(comm):
    import warnings

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    with pytest.raises(ValueError):
        new, spacing = dataset.reindex('k', 0.005, force=True, return_spacing=True)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)

        weights = numpy.random.random(dataset.shape)
        dataset['weights'] = weights
        new, spacing = dataset.reindex('k', 0.02, weights='weights', force=True, return_spacing=True)

        diff = numpy.diff(new.coords['k'])
        assert numpy.alltrue(diff > numpy.diff(dataset.coords['k'])[0])

        with pytest.raises(ValueError):
            new = dataset.reindex('mu', 0.4, force=False)
        new = dataset.reindex('mu', 0.4, force=True)
示例#36
0
def test_getitem(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # invalid key
    with pytest.raises(KeyError):
        bad = dataset['error']

    # slice columns
    sliced = dataset[['k', 'mu', 'power']]
    sliced = dataset[('k', 'mu', 'power')]

    # invalid slice
    with pytest.raises(KeyError):
        bad =dataset[['k', 'mu', 'error']]

    # too many dims in slice
    with pytest.raises(IndexError):
        bad = dataset[0,0,0]

    # cannot access single element of 2D power
    with pytest.raises(IndexError):
        bad = dataset[0,0]
示例#37
0
def test_getitem(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # invalid key
    with pytest.raises(KeyError):
        bad = dataset['error']

    # slice columns
    sliced = dataset[['k', 'mu', 'power']]
    sliced = dataset[('k', 'mu', 'power')]

    # invalid slice
    with pytest.raises(KeyError):
        bad =dataset[['k', 'mu', 'error']]

    # too many dims in slice
    with pytest.raises(IndexError):
        bad = dataset[0,0,0]

    # cannot access single element of 2D power
    with pytest.raises(IndexError):
        bad = dataset[0,0]
示例#38
0
def test_sel(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))

    # no exact match fails
    with pytest.raises(IndexError):
        sliced = dataset.sel(k=0.1)

    # this should be squeezed
    sliced = dataset.sel(k=0.1, method='nearest')
    assert len(sliced.dims) == 1

    # this is not squeezed
    sliced = dataset.sel(k=[0.1], method='nearest')
    assert sliced.shape[0] == 1

    # this return empty k with arbitary edges.
    sliced = dataset.sel(k=[], method='nearest')
    assert sliced.shape[0] == 0

    # slice in a specific k-range
    sliced = dataset.sel(k=slice(0.02, 0.15), mu=[0.5], method='nearest')
    assert sliced.shape[1] == 1
    assert numpy.alltrue((sliced['k'] >= 0.02)&(sliced['k'] <= 0.15))
示例#39
0
def _load(box=None, los=None):
    """
    Internal function to load the N-cubic results.
    """
    d = os.environ['THESIS_DIR']
    d = os.path.join(d, 'boss_dr12_mocks', 'Results', 'ChallengeMocks',
                     'nbodykit', 'power')

    if los is not None:
        assert los in "xyz"

    # the pattern
    box = "*" if box is None else "%d" % box
    los = "*" if los is None else los

    files = glob(
        os.path.join(
            d, f"pkmu_challenge_boxN{box}_unscaled_dk005_Nmu100_{los}los.dat"))

    toret = []
    for f in files:
        toret.append(BinnedStatistic.from_plaintext(['k', 'mu'], f))

    return toret
示例#40
0
 def __setstate__(self, state):
     self.__dict__.update(state)
     self.power = BinnedStatistic(['k'], [self.edges], self.power)
示例#41
0
文件: fftpower.py 项目: bccp/nbodykit
 def __setstate__(self, state):
     self.attrs = state['attrs']
     self.power = BinnedStatistic.from_state(state['power'])
     if state['poles'] is not None:
         self.poles = BinnedStatistic.from_state(state['poles'])
示例#42
0
    def _run(self, pos, w, pos_sec, w_sec, boxsize=None, bunchsize=10000):
        """
        Internal function to run the 3PCF algorithm on the input data and
        weights.

        The input data/weights have already been domain-decomposed, and
        the loads should be balanced on all ranks.
        """
        # maximum radius
        rmax = numpy.max(self.attrs['edges'])

        # the array to hold output values
        nbins  = len(self.attrs['edges'])-1
        Nell   = len(self.attrs['poles'])
        zeta = numpy.zeros((Nell,nbins,nbins), dtype='f8')
        alms = {}
        walms = {}

        # compute the Ylm expressions we need
        if self.comm.rank == 0:
            self.logger.info("computing Ylm expressions...")
        Ylm_cache = YlmCache(self.attrs['poles'], self.comm)
        if self.comm.rank ==  0:
            self.logger.info("...done")

        # make the KD-tree holding the secondaries
        tree_sec = kdcount.KDTree(pos_sec, boxsize=boxsize).root

        def callback(r, i, j, iprim=None):

            # remove self pairs
            valid = r > 0.
            r = r[valid]; i = i[valid]

            # normalized, re-centered position array (periodic)
            dpos = (pos_sec[i] - pos[iprim])

            # enforce periodicity in dpos
            if boxsize is not None:
                for axis, col in enumerate(dpos.T):
                    col[col > boxsize[axis]*0.5] -= boxsize[axis]
                    col[col <= -boxsize[axis]*0.5] += boxsize[axis]
            recen_pos = dpos / r[:,numpy.newaxis]

            # find the mapping of r to rbins
            dig = numpy.searchsorted(self.attrs['edges'], r, side='left')

            # evaluate all Ylms
            Ylms = Ylm_cache(recen_pos[:,0]+1j*recen_pos[:,1], recen_pos[:,2])

            # sqrt of primary weight
            w0 = w[iprim]

            # loop over each (l,m) pair
            for (l,m) in Ylms:

                # the Ylm evaluated at galaxy positions
                weights = Ylms[(l,m)] * w_sec[i]

                # sum over for each radial bin
                alm = alms.setdefault((l, m), numpy.zeros(nbins, dtype='c16'))
                walm = walms.setdefault((l, m), numpy.zeros(nbins, dtype='c16'))

                r1 = numpy.bincount(dig, weights=weights.real, minlength=nbins+2)[1:-1]
                alm[...] += r1
                walm[...] += w0 * r1
                if m != 0:
                    i1 = numpy.bincount(dig, weights=weights.imag, minlength=nbins+2)[1:-1]
                    alm[...] += 1j*i1
                    walm[...] += w0*1j*i1

        # determine rank with largest load
        loads = self.comm.allgather(len(pos))
        largest_load = numpy.argmax(loads)
        chunk_size = max(loads) // 10

        # compute multipoles for each primary (s vector in the paper)
        for iprim in range(len(pos)):
            # alms must be clean for each primary particle; (s) in eq 15 and 8 of arXiv:1506.02040v2
            alms.clear()
            walms.clear()
            tree_prim = kdcount.KDTree(numpy.atleast_2d(pos[iprim]), boxsize=boxsize).root
            tree_sec.enum(tree_prim, rmax, process=callback, iprim=iprim, bunch=bunchsize)

            if self.comm.rank == largest_load and iprim % chunk_size == 0:
                self.logger.info("%d%% done" % (10*iprim//chunk_size))

            # combine alms into zeta(s);
            # this cannot be done in the callback because
            # it is a nonlinear function (outer product) of alm.
            for (l, m) in alms:
                alm = alms[(l, m)]
                walm = walms[(l, m)]

                # compute alm * conjugate(alm)
                alm_w_alm = numpy.outer(walm, alm.conj())
                if m != 0: alm_w_alm += alm_w_alm.T # add in the -m contribution for m != 0
                zeta[Ylm_cache.ell_to_iell[l], ...] += alm_w_alm.real

        # sum across all ranks
        zeta = self.comm.allreduce(zeta)

        # normalize according to Eq. 15 of Slepian et al. 2015
        # differs by factor of (4 pi)^2 / (2l+1) from the C++ code
        zeta /= (4*numpy.pi)

        # make a BinnedStatistic
        dtype = numpy.dtype([('corr_%d' % ell, zeta.dtype) for ell in self.attrs['poles']])
        data = numpy.empty(zeta.shape[-2:], dtype=dtype)
        for i, ell in enumerate(self.attrs['poles']):
            data['corr_%d' % ell] = zeta[i]

        # save the result
        edges = self.attrs['edges']
        poles = BinnedStatistic(['r1', 'r2'], [edges, edges], data)
        return poles
示例#43
0
 def __setstate__(self, state):
     self.attrs = state['attrs']
     self.power = BinnedStatistic.from_state(state['power'])
     if state['poles'] is not None:
         self.poles = BinnedStatistic.from_state(state['poles'])
示例#44
0
    def run(self):
        """
        Run the algorithm. This attaches the following attributes to the class:

        - :attr:`edges`
        - :attr:`power`

        Attributes
        ----------
        edges : array_like
            the edges of the wavenumber bins
        power : :class:`~nbodykit.binned_statistic.BinnedStatistic`
            a BinnedStatistic object that holds the projected power.
            It stores the following variables:

            - k :
                the mean value for each ``k`` bin
            - power :
                complex array holding the real and imaginary components of the
                projected power
            - modes :
                the number of Fourier modes averaged together in each bin
        """
        c1 = self.first.compute(Nmesh=self.attrs['Nmesh'], mode='complex')
        r1 = c1.preview(self.attrs['Nmesh'], axes=self.attrs['axes'])
        # average along projected axes;
        # part of product is the rfftn vs r2c (for axes)
        # the rest is for the mean (Nmesh - axes)
        c1 = numpy.fft.rfftn(r1) / self.attrs['Nmesh'].prod()

        # compute the auto power of single supplied field
        if self.first is self.second:
            c2 = c1
        else:
            c2 = self.second.compute(Nmesh=self.attrs['Nmesh'], mode='complex')
            r2 = c2.preview(self.attrs['Nmesh'], axes=self.attrs['axes'])
            c2 = numpy.fft.rfftn(r2) / self.attrs['Nmesh'].prod() # average along projected axes

        pk = c1 * c2.conj()
        # clear the zero mode
        pk.flat[0] = 0

        shape = numpy.array([self.attrs['Nmesh'][i] for i in self.attrs['axes']], dtype='int')
        boxsize = numpy.array([self.attrs['BoxSize'][i] for i in self.attrs['axes']])
        I = numpy.eye(len(shape), dtype='int') * -2 + 1

        k = [numpy.fft.fftfreq(N, 1. / (N * 2 * numpy.pi / L))[:pkshape].reshape(kshape) for N, L, kshape, pkshape in zip(shape, boxsize, I, pk.shape)]

        kmag = sum(ki ** 2 for ki in k) ** 0.5
        W = numpy.empty(pk.shape, dtype='f4')
        W[...] = 2.0
        W[..., 0] = 1.0
        W[..., -1] = 1.0

        dk = self.attrs['dk']
        kmin = self.attrs['kmin']
        axes = list(self.attrs['axes'])
        kedges = numpy.arange(kmin, numpy.pi * self.attrs['Nmesh'][axes].min() / self.attrs['BoxSize'][axes].max() + dk/2, dk)

        xsum = numpy.zeros(len(kedges) + 1)
        Psum = numpy.zeros(len(kedges) + 1, dtype='complex128')
        Nsum = numpy.zeros(len(kedges) + 1)

        dig = numpy.digitize(kmag.flat, kedges)
        xsum.flat += numpy.bincount(dig, weights=(W * kmag).flat, minlength=xsum.size)
        Psum.real.flat += numpy.bincount(dig, weights=(W * pk.real).flat, minlength=xsum.size)
        Psum.imag.flat += numpy.bincount(dig, weights=(W * pk.imag).flat, minlength=xsum.size)
        Nsum.flat += numpy.bincount(dig, weights=W.flat, minlength=xsum.size)

        self.power = numpy.empty(len(kedges) - 1,
                dtype=[('k', 'f8'), ('power', 'c16'), ('modes', 'f8')])

        with numpy.errstate(invalid='ignore', divide='ignore'):
            self.power['k'] = (xsum / Nsum)[1:-1]
            self.power['power'] = (Psum / Nsum)[1:-1] * boxsize.prod() # dimension is 'volume'
            self.power['modes'] = Nsum[1:-1]

        self.edges = kedges

        self.power = BinnedStatistic(['k'], [self.edges], self.power)
示例#45
0
def test_copy(comm):

    dataset = BinnedStatistic.from_json(os.path.join(data_dir, 'dataset_2d.json'))
    copy = dataset.copy()
    for var in dataset:
        testing.assert_array_equal(dataset[var], copy[var])