Exemplo n.º 1
0
    def as_strided(x, shape=None, strides=None):
        """ Make an ndarray from the given array with the given shape and strides.
        """
        # work around Numpy bug 1873 (reported by Irwin Zaid)
        # Since this is stolen from numpy, this implementation has the same bug.
        # http://projects.scipy.org/numpy/ticket/1873
        # == https://github.com/numpy/numpy/issues/2466

        # Do not recreate the array if nothing need to be changed.
        # This fixes a lot of errors on pypy since DummyArray hack does not
        # currently (2014/May/17) on pypy.

        if ((shape is None or x.shape == shape) and
            (strides is None or x.strides == strides)):
            return x
        if not x.dtype.isbuiltin:
            if shape is None:
                shape = x.shape
            strides = tuple(strides)

            from pytools import product
            if strides is not None and shape is not None \
                    and product(shape) == product(x.shape) \
                    and x.flags.forc:
                # Workaround: If we're being asked to do what amounts to a
                # contiguous reshape, at least do that.

                if strides == f_contiguous_strides(x.dtype.itemsize, shape):
                    # **dict is a workaround for Python 2.5 syntax.
                    result = x.reshape(-1).reshape(*shape, **dict(order="F"))
                    assert result.strides == strides
                    return result
                elif strides == c_contiguous_strides(x.dtype.itemsize, shape):
                    # **dict is a workaround for Python 2.5 syntax.
                    result = x.reshape(-1).reshape(*shape, **dict(order="C"))
                    assert result.strides == strides
                    return result

            raise NotImplementedError(
                    "as_strided won't work on non-builtin arrays for now. "
                    "See https://github.com/numpy/numpy/issues/2466")

        interface = dict(x.__array_interface__)
        if shape is not None:
            interface['shape'] = tuple(shape)
        if strides is not None:
            interface['strides'] = tuple(strides)
        return np.asarray(_DummyArray(interface, base=x))
Exemplo n.º 2
0
def grad_monomial(order, rst):
    """Evaluate the derivative of the monomial of order *order* at the points *rst*.

    :arg order: A tuple *(i, j,...)* representing the order of the polynomial.
    :arg rst: ``rst[0], rst[1]`` are arrays of :math:`(r,s,...)` coordinates.
        (See :ref:`tri-coords`)
    :return: a tuple of vectors *(dphi_dr, dphi_ds, dphi_dt)*, each of the same
        length as the *rst* arrays.

    .. versionadded:: 2016.1
    """
    dim = len(order)
    assert dim == rst.shape[0]

    def diff_monomial(r, o):
        if o == 0:
            return 0 * r
        elif o == 1:
            return 1 + 0 * r
        else:
            return o * r**(o - 1)

    from pytools import product
    return tuple(
        product(
            (diff_monomial(rst[i], order[i]) if j == i else rst[i]**order[i])
            for i in range(dim)) for j in range(dim))
Exemplo n.º 3
0
def parametrization_derivative(actx: ArrayContext,
                               dcoll: DiscretizationCollection,
                               dd) -> MultiVector:
    r"""Computes the product of forward metric derivatives spanning the
    tangent space with topological dimension *dim*.

    :arg dd: a :class:`~grudge.dof_desc.DOFDesc`, or a value convertible to one.
        Defaults to the base volume discretization.
    :returns: a :class:`pymbolic.geometric_algebra.MultiVector` containing
        the product of metric derivatives.
    """
    if dd is None:
        dd = DD_VOLUME

    dim = dcoll.discr_from_dd(dd).dim
    if dim == 0:
        from pymbolic.geometric_algebra import get_euclidean_space

        return MultiVector(_signed_face_ones(actx, dcoll, dd),
                           space=get_euclidean_space(dcoll.ambient_dim))

    from pytools import product

    return product(
        forward_metric_derivative_mv(actx, dcoll, rst_axis, dd)
        for rst_axis in range(dim))
Exemplo n.º 4
0
    def get_function_declaration(self, codegen_state, codegen_result,
            schedule_index):
        fdecl = super(CUDACASTBuilder, self).get_function_declaration(
                codegen_state, codegen_result, schedule_index)

        from cgen.cuda import CudaGlobal, CudaLaunchBounds
        fdecl = CudaGlobal(fdecl)

        if self.target.extern_c:
            from cgen import Extern
            fdecl = Extern("C", fdecl)

        from loopy.schedule import get_insn_ids_for_block_at
        _, local_grid_size = \
                codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
                        get_insn_ids_for_block_at(
                            codegen_state.kernel.schedule, schedule_index))

        from loopy.symbolic import get_dependencies
        if not get_dependencies(local_grid_size):
            # Sizes can't have parameter dependencies if they are
            # to be used in static thread block size.
            from pytools import product
            nthreads = product(local_grid_size)

            fdecl = CudaLaunchBounds(nthreads, fdecl)

        return fdecl
Exemplo n.º 5
0
    def __init__(self, center, extent=1, npoints=1000):
        center = np.asarray(center)
        self.dimensions, = dim, = center.shape
        self.a = a = center-extent*0.5
        self.b = b = center+extent*0.5

        if not isinstance(npoints, tuple):
            npoints = dim*(npoints,)
        else:
            if len(npoints) != dim:
                raise ValueError("length of npoints must match dimension")

        for i in range(dim):
            if npoints[i] == 1:
                a[i] = center[i]

        mgrid_index = tuple(
                slice(a[i], b[i], 1j*npoints[i])
                for i in range(dim))

        mgrid = np.mgrid[mgrid_index]

        # (axis, point x idx, point y idx, ...)
        self.nd_points = mgrid

        self.points = self.nd_points.reshape(dim, -1).copy()

        from pytools import product
        self.npoints = product(npoints)
Exemplo n.º 6
0
    def nbytes(self):
        shape = self.shape
        if self.storage_shape is not None:
            shape = self.storage_shape

        from pytools import product
        return product(si for si in shape)*self.dtype.itemsize
Exemplo n.º 7
0
    def __init__(self, center, extent=1, npoints=1000):
        center = np.asarray(center)
        self.dimensions, = dim, = center.shape
        self.a = a = center - extent * 0.5
        self.b = b = center + extent * 0.5

        from numbers import Number
        if isinstance(npoints, Number):
            npoints = dim * (npoints, )
        else:
            if len(npoints) != dim:
                raise ValueError("length of npoints must match dimension")

        for i in range(dim):
            if npoints[i] == 1:
                a[i] = center[i]

        mgrid_index = tuple(
            slice(a[i], b[i], 1j * npoints[i]) for i in range(dim))

        mgrid = np.mgrid[mgrid_index]

        # (axis, point x idx, point y idx, ...)
        self.nd_points = mgrid

        self.points = self.nd_points.reshape(dim, -1).copy()

        from pytools import product
        self.npoints = product(npoints)
Exemplo n.º 8
0
def grad_monomial(order, rst):
    """Evaluate the derivative of the monomial of order *order* at the points *rst*.

    :arg order: A tuple *(i, j,...)* representing the order of the polynomial.
    :arg rst: ``rst[0], rst[1]`` are arrays of :math:`(r,s,...)` coordinates.
        (See :ref:`tri-coords`)
    :return: a tuple of vectors *(dphi_dr, dphi_ds, dphi_dt)*, each of the same
        length as the *rst* arrays.

    .. versionadded:: 2016.1
    """
    dim = len(order)
    assert dim == rst.shape[0]

    def diff_monomial(r, o):
        if o == 0:
            return 0*r
        elif o == 1:
            return 1+0*r
        else:
            return o * r**(o-1)

    from pytools import product
    return tuple(
            product(
                (
                    diff_monomial(rst[i], order[i])
                    if j == i else
                    rst[i] ** order[i])
                for i in range(dim)
                )
            for j in range(dim))
Exemplo n.º 9
0
    def __init__(self, shape, dtype=numpy.float32, stream=None, allocator=drv.mem_alloc,cuda_device=0):
        try:
            drv.init()
            ctx = drv.Device(0).make_context()
        except RuntimeError:
            "device is already initialized! so we ignore this ugly, but works for now"
        
        #which device are we working on
        self.cuda_device = cuda_device
        
        #internal shape
        self.shape = shape
        
        #internal type
        self.dtype = numpy.dtype(dtype)

        from pytools import product
        
        #internal size
        self.size = product(shape)

        self.allocator = allocator
        if self.size:
            self.gpudata = self.allocator(self.size * self.dtype.itemsize)
        else:
            self.gpudata = None
        self.stream = stream

        self._update_kernel_kwargs()
Exemplo n.º 10
0
    def get_function_declaration(self, codegen_state, codegen_result,
            schedule_index):
        fdecl = super(CUDACASTBuilder, self).get_function_declaration(
                codegen_state, codegen_result, schedule_index)

        from cgen.cuda import CudaGlobal, CudaLaunchBounds
        fdecl = CudaGlobal(fdecl)

        if self.target.extern_c:
            from cgen import Extern
            fdecl = Extern("C", fdecl)

        from loopy.schedule import get_insn_ids_for_block_at
        _, local_grid_size = \
                codegen_state.kernel.get_grid_sizes_for_insn_ids_as_exprs(
                        get_insn_ids_for_block_at(
                            codegen_state.kernel.schedule, schedule_index))

        from loopy.symbolic import get_dependencies
        if not get_dependencies(local_grid_size):
            # Sizes can't have parameter dependencies if they are
            # to be used in static thread block size.
            from pytools import product
            nthreads = product(local_grid_size)

            fdecl = CudaLaunchBounds(nthreads, fdecl)

        return fdecl
Exemplo n.º 11
0
    def nbytes(self):
        shape = self.shape
        if self.storage_shape is not None:
            shape = self.storage_shape

        from pytools import product
        return product(si for si in shape) * self.dtype.itemsize
Exemplo n.º 12
0
def as_strided(x, shape=None, strides=None):
    """ Make an ndarray from the given array with the given shape and strides.
    """
    # work around Numpy bug 1873 (reported by Irwin Zaid)
    # Since this is stolen from numpy, this implementation has the same bug.
    # http://projects.scipy.org/numpy/ticket/1873
    # == https://github.com/numpy/numpy/issues/2466

    if not x.dtype.isbuiltin:
        if (shape is None or x.shape == shape) and \
                (strides is None or x.strides == strides):
            return x

        if shape is None:
            shape = x.shape
        strides = tuple(strides)

        from pytools import product
        if strides is not None and shape is not None \
                and product(shape) == product(x.shape) \
                and x.flags.forc:
            # Workaround: If we're being asked to do what amounts to a
            # contiguous reshape, at least do that.

            if strides == f_contiguous_strides(x.dtype.itemsize, shape):
                # **dict is a workaround for Python 2.5 syntax.
                result = x.reshape(-1).reshape(*shape, **dict(order="F"))
                assert result.strides == strides
                return result
            elif strides == c_contiguous_strides(x.dtype.itemsize, shape):
                # **dict is a workaround for Python 2.5 syntax.
                result = x.reshape(-1).reshape(*shape, **dict(order="C"))
                assert result.strides == strides
                return result

        raise NotImplementedError(
                "as_strided won't work on non-builtin arrays for now. "
                "See https://github.com/numpy/numpy/issues/2466")

    interface = dict(x.__array_interface__)
    if shape is not None:
        interface['shape'] = tuple(shape)
    if strides is not None:
        interface['strides'] = tuple(strides)
    return np.asarray(_DummyArray(interface, base=x))
Exemplo n.º 13
0
def parametrization_derivative(ambient_dim, dim, where=None):
    """Return a :class:`pymbolic.geometric_algebra.MultiVector` representing
    the derivative of the reference-to-global parametrization.
    """

    par_grad = parametrization_derivative_matrix(ambient_dim, dim, where)

    from pytools import product
    return product(MultiVector(vec) for vec in par_grad.T)
Exemplo n.º 14
0
 def __init__(self, shape, dtype, stream=None):
     self.shape = shape
     self.dtype = numpy.dtype(dtype)
     from pytools import product
     self.size = product(shape)
     if self.size:
         self.gpudata = drv.mem_alloc(self.size * self.dtype.itemsize)
     else:
         self.gpudata = None
     self.stream = stream
Exemplo n.º 15
0
def as_strided(x, shape=None, strides=None):
    """ Make an ndarray from the given array with the given shape and strides.
    """
    # work around Numpy bug 1873 (reported by Irwin Zaid)
    # Since this is stolen from numpy, this implementation has the same bug.
    # http://projects.scipy.org/numpy/ticket/1873

    if not x.dtype.isbuiltin:
        if (shape is None or x.shape == shape) and \
                (strides is None or x.strides == strides):
            return x

        if shape is None:
            shape = x.shape
        strides = tuple(strides)

        from pytools import product
        if strides is not None and shape is not None \
                and product(shape) == product(x.shape) \
                and x.flags.forc:
            # Workaround: If we're being asked to do what amounts to a
            # contiguous reshape, at least do that.

            if strides == f_contiguous_strides(x.dtype.itemsize, shape):
                result = x.reshape(-1).reshape(*shape, order="F")
                assert result.strides == strides
                return result
            elif strides == c_contiguous_strides(x.dtype.itemsize, shape):
                result = x.reshape(-1).reshape(*shape, order="C")
                assert result.strides == strides
                return result

        raise NotImplementedError(
                "as_strided won't work on non-builtin arrays for now. "
                "See http://projects.scipy.org/numpy/ticket/1873")

    interface = dict(x.__array_interface__)
    if shape is not None:
        interface['shape'] = tuple(shape)
    if strides is not None:
        interface['strides'] = tuple(strides)
    return np.asarray(_DummyArray(interface, base=x))
Exemplo n.º 16
0
def check_sizes(kernel, device):
    import loopy as lp

    from loopy.diagnostic import LoopyAdvisory, LoopyError

    if device is None:
        from loopy.diagnostic import warn
        warn(kernel, "no_device_in_pre_codegen_checks",
                "No device parameter was passed to the PyOpenCLTarget. "
                "Perhaps you want to pass a device to benefit from "
                "additional checking.", LoopyAdvisory)
        return

    parameters = {}
    for arg in kernel.args:
        if isinstance(arg, lp.ValueArg) and arg.approximately is not None:
            parameters[arg.name] = arg.approximately

    glens, llens = kernel.get_grid_size_upper_bounds_as_exprs()

    if (max(len(glens), len(llens))
            > device.max_work_item_dimensions):
        raise LoopyError("too many work item dimensions")

    from pymbolic import evaluate
    from pymbolic.mapper.evaluator import UnknownVariableError
    try:
        glens = evaluate(glens, parameters)
        llens = evaluate(llens, parameters)
    except UnknownVariableError as name:
        from warnings import warn
        warn("could not check axis bounds because no value "
                "for variable '%s' was passed to check_kernels()"
                % name, LoopyAdvisory)
    else:
        for i in range(len(llens)):
            if llens[i] > device.max_work_item_sizes[i]:
                raise LoopyError("group axis %d too big" % i)

        from pytools import product
        if product(llens) > device.max_work_group_size:
            raise LoopyError("work group too big")

    from pyopencl.characterize import usable_local_mem_size
    if kernel.local_mem_use() > usable_local_mem_size(device):
        raise LoopyError("using too much local memory")

    from loopy.kernel.data import ConstantArg
    const_arg_count = sum(
            1 for arg in kernel.args
            if isinstance(arg, ConstantArg))

    if const_arg_count > device.max_constant_args:
        raise LoopyError("too many constant arguments")
Exemplo n.º 17
0
def check_sizes(kernel, device):
    import loopy as lp

    from loopy.diagnostic import LoopyAdvisory, LoopyError

    if device is None:
        from loopy.diagnostic import warn
        warn(kernel, "no_device_in_pre_codegen_checks",
                "No device parameter was passed to the PyOpenCLTarget. "
                "Perhaps you want to pass a device to benefit from "
                "additional checking.", LoopyAdvisory)
        return

    parameters = {}
    for arg in kernel.args:
        if isinstance(arg, lp.ValueArg) and arg.approximately is not None:
            parameters[arg.name] = arg.approximately

    glens, llens = kernel.get_grid_sizes_as_exprs()

    if (max(len(glens), len(llens))
            > device.max_work_item_dimensions):
        raise LoopyError("too many work item dimensions")

    from pymbolic import evaluate
    from pymbolic.mapper.evaluator import UnknownVariableError
    try:
        glens = evaluate(glens, parameters)
        llens = evaluate(llens, parameters)
    except UnknownVariableError as name:
        from warnings import warn
        warn("could not check axis bounds because no value "
                "for variable '%s' was passed to check_kernels()"
                % name, LoopyAdvisory)
    else:
        for i in range(len(llens)):
            if llens[i] > device.max_work_item_sizes[i]:
                raise LoopyError("group axis %d too big" % i)

        from pytools import product
        if product(llens) > device.max_work_group_size:
            raise LoopyError("work group too big")

    from pyopencl.characterize import usable_local_mem_size
    if kernel.local_mem_use() > usable_local_mem_size(device):
        raise LoopyError("using too much local memory")

    from loopy.kernel.data import ConstantArg
    const_arg_count = sum(
            1 for arg in kernel.args
            if isinstance(arg, ConstantArg))

    if const_arg_count > device.max_constant_args:
        raise LoopyError("too many constant arguments")
Exemplo n.º 18
0
def parametrization_derivative(ambient_dim, dim=None, dd=None):
    if dim is None:
        dim = ambient_dim

    if dim == 0:
        return MultiVector(np.array([_SignedFaceOnes(dd)]))

    from pytools import product
    return product(
        forward_metric_derivative_mv(ambient_dim, rst_axis, dd)
        for rst_axis in range(dim))
Exemplo n.º 19
0
def monomial(order, rst):
    """Evaluate the monomial of order *order* at the points *rst*.

    :arg order: A tuple *(i, j,...)* representing the order of the polynomial.
    :arg rst: ``rst[0], rst[1]`` are arrays of :math:`(r,s,...)` coordinates.
        (See :ref:`tri-coords`)
    """
    dim = len(order)
    assert dim == rst.shape[0]

    from pytools import product
    return product(rst[i] ** order[i] for i in range(dim))
Exemplo n.º 20
0
def monomial(order, rst):
    """Evaluate the monomial of order *order* at the points *rst*.

    :arg order: A tuple *(i, j,...)* representing the order of the polynomial.
    :arg rst: ``rst[0], rst[1]`` are arrays of :math:`(r,s,...)` coordinates.
        (See :ref:`tri-coords`)
    """
    dim = len(order)
    assert dim == rst.shape[0]

    from pytools import product
    return product(rst[i]**order[i] for i in range(dim))
Exemplo n.º 21
0
def parametrization_derivative(ambient_dim, dim=None, dd=None):
    if dim is None:
        dim = ambient_dim

    if dim == 0:
        from pymbolic.geometric_algebra import get_euclidean_space
        return MultiVector(_SignedFaceOnes(dd),
                space=get_euclidean_space(ambient_dim))

    from pytools import product
    return product(
        forward_metric_derivative_mv(ambient_dim, rst_axis, dd)
        for rst_axis in range(dim))
Exemplo n.º 22
0
    def diagonal(self, *, get_data=True):
        no_trace_tensors = [basis.computational_basis_vectors
                            for basis in self.bases]

        trace_argument = []
        n_qubits = self.n_qubits
        for i, ntt in enumerate(no_trace_tensors):
            trace_argument.append(ntt)
            trace_argument.append([i + n_qubits, i])

        indices = list(range(n_qubits))
        out_indices = list(range(n_qubits, 2 * n_qubits))
        complex_dm_dimension = pytools.product(self.dim_hilbert)
        return np.einsum(self._data, indices, *trace_argument, out_indices,
                         optimize=True).real.reshape(complex_dm_dimension)
Exemplo n.º 23
0
    def __call__(self, op_class, field):
        discr = self.discr
        given = self.plan.given

        d = discr.dimensions
        elgroup, = discr.element_groups

        func, field_texref = self.get_kernel(op_class, elgroup)

        assert field.dtype == given.float_type

        field.bind_to_texref_ext(field_texref, allow_double_hack=True)

        rst_diff = [discr.volume_empty() for axis in range(d)]
        rst_diff_gpudata = [subarray.gpudata for subarray in rst_diff]

        gpu_diffmats = self.gpu_diffmats(op_class, elgroup)

        if discr.instrumented:
            discr.diff_op_timer.add_timer_callable(func.prepared_timed_call(
                    self.grid, gpu_diffmats.device_memory,
                    *rst_diff_gpudata))

            from pytools import product
            discr.gmem_bytes_diff.add(
                    given.float_size()
                    * (
                        # matrix fetch
                        gpu_diffmats.block_floats * product(self.grid)
                        # field fetch
                        + given.dofs_per_el()
                        * given.dofs_per_el() * given.microblock.elements
                        * self.grid[1] * self.plan.parallelism.total()
                        # field store
                        + len(discr.nodes)
                        ))
        else:
            func.prepared_call(self.grid, gpu_diffmats.device_memory,
                    *rst_diff_gpudata)

        if False:
            copied_debugbuf = debugbuf.get()
            print "DEBUG"
            #print numpy.reshape(copied_debugbuf, (len(copied_debugbuf)//16, 16))
            print copied_debugbuf[:100].reshape((10,10))
            raw_input()

        return rst_diff
Exemplo n.º 24
0
    def get_rho_distrib(self):
        compdata = [(i, numpy.array(l), numpy.array(h))
                for i, (l, h) in enumerate(self.zipped)]

        from pytools import product
        normalization = 1/product(h-l for l, h in self.zipped)

        lower = numpy.array(self.lower)
        upper = numpy.array(self.upper)

        def f(x, el):
            if (x < lower).any() or (upper < x).any():
                return 0
            return normalization

        return f
Exemplo n.º 25
0
    def wrap_function_declaration(self, kernel, fdecl):
        from cgen.cuda import CudaGlobal, CudaLaunchBounds

        fdecl = CudaGlobal(fdecl)

        if self.extern_c:
            from cgen import Extern

            fdecl = Extern("C", fdecl)

        _, local_grid_size = kernel.get_grid_sizes_as_exprs()

        from pytools import product

        nthreads = product(local_grid_size)

        return CudaLaunchBounds(nthreads, fdecl)
Exemplo n.º 26
0
    def __call__(self, op_class, field):
        discr = self.discr
        given = self.plan.given

        d = discr.dimensions
        elgroup, = discr.element_groups

        func, field_texref = self.get_kernel(op_class, elgroup)

        assert field.dtype == given.float_type

        field.bind_to_texref_ext(field_texref, allow_double_hack=True)

        rst_diff = [discr.volume_empty() for axis in range(d)]
        rst_diff_gpudata = [subarray.gpudata for subarray in rst_diff]

        gpu_diffmats = self.gpu_diffmats(op_class, elgroup)

        if discr.instrumented:
            discr.diff_op_timer.add_timer_callable(
                func.prepared_timed_call(self.grid, gpu_diffmats.device_memory,
                                         *rst_diff_gpudata))

            from pytools import product
            discr.gmem_bytes_diff.add(given.float_size() * (
                # matrix fetch
                gpu_diffmats.block_floats * product(self.grid)
                # field fetch
                + given.dofs_per_el() * given.dofs_per_el() *
                given.microblock.elements * self.grid[1] *
                self.plan.parallelism.total()
                # field store
                + len(discr.nodes)))
        else:
            func.prepared_call(self.grid, gpu_diffmats.device_memory,
                               *rst_diff_gpudata)

        if False:
            copied_debugbuf = debugbuf.get()
            print "DEBUG"
            #print numpy.reshape(copied_debugbuf, (len(copied_debugbuf)//16, 16))
            print copied_debugbuf[:100].reshape((10, 10))
            raw_input()

        return rst_diff
Exemplo n.º 27
0
    def diagonal(self, *, get_data=True):
        no_trace_tensors = [
            basis.computational_basis_vectors for basis in self.bases
        ]

        trace_argument = []
        n_qubits = self.n_qubits
        for i, ntt in enumerate(no_trace_tensors):
            trace_argument.append(ntt)
            trace_argument.append([i + n_qubits, i])

        indices = list(range(n_qubits))
        out_indices = list(range(n_qubits, 2 * n_qubits))
        complex_dm_dimension = pytools.product(self.dim_hilbert)
        return np.einsum(self._data,
                         indices,
                         *trace_argument,
                         out_indices,
                         optimize=True).real.reshape(complex_dm_dimension)
Exemplo n.º 28
0
    def map_product(self, expr):
        from pymbolic.primitives import is_constant
        const = []
        nonconst = []
        for subexpr in expr.children:
            if is_constant(subexpr):
                const.append(subexpr)
            else:
                nonconst.append(subexpr)

        if len(nonconst) > 1:
            raise RuntimeError("DerivativeTaker doesn't support products with "
                               "more than one non-constant")

        if not nonconst:
            nonconst = [1]

        from pytools import product
        return product(const) * self.rec(nonconst[0])
Exemplo n.º 29
0
 def _ensure_gpu_array_shape(self, arr, shape):
     new_size = pytools.product(shape)
     new_size_bytes = new_size * 8
     if arr.gpudata.size < new_size_bytes:
         # reallocate
         try:
             arr.gpudata.free()
             out = ga.empty(shape, np.float64)
             out.gpudata.size = self._work_data.nbytes
         except Exception as ex:
             raise RuntimeError(
                 f"Could not allocate a GPU array of shape {shape} "
                 f"and size {new_size_bytes} bytes") from ex
     else:
         # reallocation not required,
         # reshape but reuse allocation
         out = ga.GPUArray(
             shape=shape,
             dtype=np.float64,
             gpudata=self._work_data.gpudata,
         )
     return out
Exemplo n.º 30
0
    def get_rho_distrib(self):
        z_func = self.next.get_rho_distrib()
        z_count = self.next.count_axes()[0]
        if self.axis_first:
            z_slice = slice(0, z_count)
            my_slice = slice(z_count, None)
        else:
            z_slice = slice(len(self.radii), None)
            my_slice = slice(0, len(self.center))

        n = len(self.radii)
        from math import pi
        from pyrticle._internal import gamma
        from pytools import product
        distr_vol = 2 * pi**(n/2) \
                / (gamma(n/2)*n) \
                * product(self.radii)

        if n == 2:
            normalization = 1/distr_vol
            def f(x, el):
                if la.norm((x[my_slice]-self.center)/self.radii) <= 1:
                    return normalization*z_func(x[z_slice], el)
                else:
                    return 0
        elif n == 1:
            normalization = 2/(pi*distr_vol)
            def f(x, el):
                normx = la.norm((x[my_slice]-self.center)/self.radii)
                if normx <= 1:
                    return normalization\
                            *z_func(x[z_slice], el)\
                            *(1-normx**2)**-0.5
                else:
                    return 0
        else:
            raise ValueError, "invalid dimension for KV"

        return f
Exemplo n.º 31
0
def generate_linearized_array(array, value):
    from pytools import product
    size = product(shape_ax for shape_ax in array.shape)

    if not isinstance(size, int):
        raise LoopyError("cannot produce literal for array '%s': "
                "shape is not a compile-time constant"
                % array.name)

    strides = []

    data = np.zeros(size, array.dtype.numpy_dtype)

    from loopy.kernel.array import FixedStrideArrayDimTag
    for i, dim_tag in enumerate(array.dim_tags):
        if isinstance(dim_tag, FixedStrideArrayDimTag):

            if not isinstance(dim_tag.stride, int):
                raise LoopyError("cannot produce literal for array '%s': "
                        "stride along axis %d (1-based) is not a "
                        "compile-time constant"
                        % (array.name, i+1))

            strides.append(dim_tag.stride)

        else:
            raise LoopyError("cannot produce literal for array '%s': "
                    "dim_tag type '%s' not supported"
                    % (array.name, type(dim_tag).__name__))

    assert array.offset == 0

    from pytools import indices_in_shape
    for ituple in indices_in_shape(value.shape):
        i = sum(i_ax * strd_ax for i_ax, strd_ax in zip(ituple, strides))
        data[i] = value[ituple]

    return data
Exemplo n.º 32
0
def generate_linearized_array(array, value):
    from pytools import product
    size = product(shape_ax for shape_ax in array.shape)

    if not isinstance(size, int):
        raise LoopyError("cannot produce literal for array '%s': "
                "shape is not a compile-time constant"
                % array.name)

    strides = []

    data = np.zeros(size, array.dtype.numpy_dtype)

    from loopy.kernel.array import FixedStrideArrayDimTag
    for i, dim_tag in enumerate(array.dim_tags):
        if isinstance(dim_tag, FixedStrideArrayDimTag):

            if not isinstance(dim_tag.stride, int):
                raise LoopyError("cannot produce literal for array '%s': "
                        "stride along axis %d (1-based) is not a "
                        "compile-time constant"
                        % (array.name, i+1))

            strides.append(dim_tag.stride)

        else:
            raise LoopyError("cannot produce literal for array '%s': "
                    "dim_tag type '%s' not supported"
                    % (array.name, type(dim_tag).__name__))

    assert array.offset == 0

    from pytools import indices_in_shape
    for ituple in indices_in_shape(value.shape):
        i = sum(i_ax * strd_ax for i_ax, strd_ax in zip(ituple, strides))
        data[i] = value[ituple]

    return data
Exemplo n.º 33
0
    def map_parametrization_derivative(self, expr):
        discr = self.discr_dict[expr.where]

        from pytential.qbx import LayerPotentialSource
        if isinstance(discr, LayerPotentialSource):
            discr = discr.fine_density_discr

        from meshmode.discretization import Discretization
        if not isinstance(discr, Discretization):
            raise RuntimeError("Cannot compute the parametrization derivative "
                    "of something that is not a discretization (a target perhaps?). "
                    "For example, you will receive this error if you try to "
                    "evaluate S' in the volume.")

        par_grad = np.zeros((discr.ambient_dim, discr.dim), np.object)
        for i in range(discr.ambient_dim):
            for j in range(discr.dim):
                par_grad[i, j] = prim.NumReferenceDerivative(
                        frozenset([j]),
                        prim.NodeCoordinateComponent(i, expr.where),
                        expr.where)

        from pytools import product
        return product(MultiVector(vec) for vec in par_grad.T)
Exemplo n.º 34
0
def generate_box_mesh(axis_coords, order=1, coord_dtype=np.float64):
    """Create a semi-structured mesh.

    :param axis_coords: a tuple with a number of entries corresponding
        to the number of dimensions, with each entry a numpy array
        specifying the coordinates to be used along that axis.
    """

    for iaxis, axc in enumerate(axis_coords):
        if len(axc) < 2:
            raise ValueError("need at least two points along axis %d"
                    % (iaxis+1))

    dim = len(axis_coords)

    shape = tuple(len(axc) for axc in axis_coords)

    from pytools import product
    nvertices = product(shape)

    vertex_indices = np.arange(nvertices).reshape(*shape, order="F")

    vertices = np.empty((dim,)+shape, dtype=coord_dtype)
    for idim in range(dim):
        vshape = (shape[idim],) + (1,)*idim
        vertices[idim] = axis_coords[idim].reshape(*vshape)

    vertices = vertices.reshape(dim, -1)

    el_vertices = []

    if dim == 1:
        for i in range(shape[0]-1):
            # a--b

            a = vertex_indices[i]
            b = vertex_indices[i+1]

            el_vertices.append((a, b,))

    elif dim == 2:
        for i in range(shape[0]-1):
            for j in range(shape[1]-1):

                # c--d
                # |  |
                # a--b

                a = vertex_indices[i, j]
                b = vertex_indices[i+1, j]
                c = vertex_indices[i, j+1]
                d = vertex_indices[i+1, j+1]

                el_vertices.append((a, b, c))
                el_vertices.append((d, c, b))

    elif dim == 3:
        for i in range(shape[0]-1):
            for j in range(shape[1]-1):
                for k in range(shape[2]-1):

                    a000 = vertex_indices[i, j, k]
                    a001 = vertex_indices[i, j, k+1]
                    a010 = vertex_indices[i, j+1, k]
                    a011 = vertex_indices[i, j+1, k+1]

                    a100 = vertex_indices[i+1, j, k]
                    a101 = vertex_indices[i+1, j, k+1]
                    a110 = vertex_indices[i+1, j+1, k]
                    a111 = vertex_indices[i+1, j+1, k+1]

                    el_vertices.append((a000, a100, a010, a001))
                    el_vertices.append((a101, a100, a001, a010))
                    el_vertices.append((a101, a011, a010, a001))

                    el_vertices.append((a100, a010, a101, a110))
                    el_vertices.append((a011, a010, a110, a101))
                    el_vertices.append((a011, a111, a101, a110))

    else:
        raise NotImplementedError("box meshes of dimension %d"
                % dim)

    el_vertices = np.array(el_vertices, dtype=np.int32)

    grp = make_group_from_vertices(
            vertices.reshape(dim, -1), el_vertices, order)

    from meshmode.mesh import Mesh
    return Mesh(vertices, [grp],
            nodal_adjacency=None,
            facial_adjacency_groups=None)
Exemplo n.º 35
0
 def f(x, el):
     return product(f(x[sl], el) for f, sl in funcs_and_slices)
Exemplo n.º 36
0
def adjust_local_temp_var_storage(kernel, device):
    import pyopencl as cl
    import pyopencl.characterize as cl_char

    logger.debug("%s: adjust temp var storage" % kernel.name)

    new_temp_vars = {}

    from loopy.kernel.data import temp_var_scope

    lmem_size = cl_char.usable_local_mem_size(device)
    for temp_var in six.itervalues(kernel.temporary_variables):
        if temp_var.scope != temp_var_scope.LOCAL:
            new_temp_vars[temp_var.name] = \
                    temp_var.copy(storage_shape=temp_var.shape)
            continue

        other_loctemp_nbytes = [
                tv.nbytes
                for tv in six.itervalues(kernel.temporary_variables)
                if tv.scope == temp_var_scope.LOCAL
                and tv.name != temp_var.name]

        storage_shape = temp_var.storage_shape

        if storage_shape is None:
            storage_shape = temp_var.shape

        storage_shape = list(storage_shape)

        # sizes of all dims except the last one, which we may change
        # below to avoid bank conflicts
        from pytools import product

        if device.local_mem_type == cl.device_local_mem_type.GLOBAL:
            # FIXME: could try to avoid cache associativity disasters
            new_storage_shape = storage_shape

        elif device.local_mem_type == cl.device_local_mem_type.LOCAL:
            min_mult = cl_char.local_memory_bank_count(device)
            good_incr = None
            new_storage_shape = storage_shape
            min_why_not = None

            for increment in range(storage_shape[-1]//2):

                test_storage_shape = storage_shape[:]
                test_storage_shape[-1] = test_storage_shape[-1] + increment
                new_mult, why_not = cl_char.why_not_local_access_conflict_free(
                        device, temp_var.dtype.itemsize,
                        temp_var.shape, test_storage_shape)

                # will choose smallest increment 'automatically'
                if new_mult < min_mult:
                    new_lmem_use = (sum(other_loctemp_nbytes)
                            + temp_var.dtype.itemsize*product(test_storage_shape))
                    if new_lmem_use < lmem_size:
                        new_storage_shape = test_storage_shape
                        min_mult = new_mult
                        min_why_not = why_not
                        good_incr = increment

            if min_mult != 1:
                from warnings import warn
                from loopy.diagnostic import LoopyAdvisory
                warn("could not find a conflict-free mem layout "
                        "for local variable '%s' "
                        "(currently: %dx conflict, increment: %s, reason: %s)"
                        % (temp_var.name, min_mult, good_incr, min_why_not),
                        LoopyAdvisory)
        else:
            from warnings import warn
            warn("unknown type of local memory")

            new_storage_shape = storage_shape

        new_temp_vars[temp_var.name] = temp_var.copy(storage_shape=new_storage_shape)

    return kernel.copy(temporary_variables=new_temp_vars)
Exemplo n.º 37
0
 def __len__(self):
     return pytools.product(self._Dimensions)
Exemplo n.º 38
0
    def _apply_two_qubit_ptm(self, qubit0, qubit1, ptm):
        """Apply a two-qubit Pauli transfer matrix to qubit `bit0` and `bit1`.

        Parameters
        ----------
        ptm: array-like
            A two-qubit ptm in the basis of `bit0` and `bit1`. Must be a 4D
            matrix with dimensions, that correspond to the qubits.
        qubit1 : int
            Index of first qubit
        qubit0: int
            Index of second qubit
        """
        self._validate_qubit(qubit1, 'qubit0')
        self._validate_qubit(qubit0, 'qubit1')
        if len(ptm.shape) != 4:
            raise ValueError("`ptm` must be a 4D array, got {}D".format(
                len(ptm.shape)))

        # bit0 must be the more significant bit (bit 0 is msb)
        if qubit0 > qubit1:
            qubit0, qubit1 = qubit1, qubit0
            ptm = np.einsum("abcd -> badc", ptm)

        new_shape = list(self._data.shape)
        dim0_out, dim1_out, dim0_in, dim1_in = ptm.shape
        assert new_shape[qubit1] == dim1_in
        assert new_shape[qubit0] == dim0_in
        new_shape[qubit1] = dim1_out
        new_shape[qubit0] = dim0_out
        new_size = pytools.product(new_shape)
        new_size_bytes = new_size * 8

        if self._work_data.gpudata.size < new_size_bytes:
            # reallocate
            self._work_data.gpudata.free()
            self._work_data = ga.empty(new_shape, np.float64)
            self._work_data.gpudata.size = self._work_data.nbytes
        else:
            # reallocation not required,
            # reshape but reuse allocation
            self._work_data = ga.GPUArray(
                shape=new_shape,
                dtype=np.float64,
                gpudata=self._work_data.gpudata,
            )

        ptm_gpu = self._cached_gpuarray(ptm)

        rest_shape = new_shape.copy()
        rest_shape[qubit1] = 1
        rest_shape[qubit0] = 1

        dint = 1
        for i in sorted(rest_shape):
            if i * dint > 256 // (dim0_out * dim1_out):
                break
            else:
                dint *= i

        # dim_a_out, dim_b_out, d_internal (arbitrary)
        block = (dim0_out, dim1_out, dint)
        blocksize = dim1_out * dim0_out * dint
        sh_mem_size = dint * dim1_in * dim0_in  # + ptm.size
        grid_size = max(1, (new_size - 1) // blocksize + 1)
        grid = (grid_size, 1, 1)

        dim_z = pytools.product(self._data.shape[qubit1 + 1:])
        dim_y = pytools.product(self._data.shape[qubit0 + 1:qubit1])
        dim_rho = new_size  # self.data.size

        _two_qubit_general_ptm.prepared_call(grid,
                                             block,
                                             self._data.gpudata,
                                             self._work_data.gpudata,
                                             ptm_gpu.gpudata,
                                             dim0_in,
                                             dim1_in,
                                             dim_z,
                                             dim_y,
                                             dim_rho,
                                             shared_size=8 * sh_mem_size)

        self._data, self._work_data = self._work_data, self._data
Exemplo n.º 39
0
    def __call__(self, in_vector, prepped_mat, prepped_scaling, out_vector=None):
        discr = self.discr
        elgroup, = discr.element_groups
        given = self.plan.given

        kernel, in_vector_texref, scaling_texref = \
                self.get_kernel(prepped_scaling is not None)

        if out_vector is None:
            out_vector = discr.volume_empty()

        in_vector.bind_to_texref_ext(in_vector_texref, allow_double_hack=True)
        if prepped_scaling is not None:
            prepped_scaling.bind_to_texref_ext(scaling_texref,
                    allow_double_hack=True)

        if set([self.plan.debug_name, "cuda_debugbuf"]) <= discr.debug:
            debugbuf = gpuarray.zeros((1024,), dtype=given.float_type)
        else:
            debugbuf = FakeGPUArray()

        if discr.instrumented:
            discr.el_local_timer.add_timer_callable(
                    kernel.prepared_timed_call(
                        self.grid,
                        out_vector.gpudata,
                        prepped_mat,
                        debugbuf.gpudata,
                        len(discr.blocks)*given.microblocks_per_block,
                        ))

            from pytools import product
            discr.gmem_bytes_el_local.add(
                    given.float_size()
                    * (
                        # matrix fetch
                        self.plan.gpu_matrix_block_floats() * product(self.grid)
                        # field fetch
                        + self.plan.preimage_dofs_per_el
                        * given.dofs_per_el() * given.microblock.elements
                        * self.grid[1] * self.plan.parallelism.total()
                        # field store
                        + len(discr.nodes)
                        ))
        else:
            kernel.prepared_call(
                    self.grid,
                    out_vector.gpudata,
                    prepped_mat,
                    debugbuf.gpudata,
                    len(discr.blocks)*given.microblocks_per_block,
                    )

        if set([self.plan.debug_name, "cuda_debugbuf"]) <= discr.debug:
            copied_debugbuf = debugbuf.get()[:144*7].reshape((144,7))
            print "DEBUG"
            numpy.set_printoptions(linewidth=100)
            copied_debugbuf.shape = (144,7)
            numpy.set_printoptions(threshold=3000)

            print copied_debugbuf
            raw_input()

        return out_vector
Exemplo n.º 40
0
def generate_box_mesh(axis_coords,
                      order=1,
                      coord_dtype=np.float64,
                      group_cls=None,
                      boundary_tag_to_face=None,
                      mesh_type=None):
    r"""Create a semi-structured mesh.

    :param axis_coords: a tuple with a number of entries corresponding
        to the number of dimensions, with each entry a numpy array
        specifying the coordinates to be used along that axis.
    :param group_cls: One of :class:`meshmode.mesh.SimplexElementGroup`
        or :class:`meshmode.mesh.TensorProductElementGroup`.
    :param boundary_tag_to_face: an optional dictionary for tagging boundaries.
        The keys correspond to custom boundary tags, with the values giving
        a list of the faces on which they should be applied in terms of coordinate
        directions (``+x``, ``-x``, ``+y``, ``-y``, ``+z``, ``-z``, ``+w``, ``-w``).

        For example::

            boundary_tag_to_face={"bdry_1": ["+x", "+y"], "bdry_2": ["-x"]}
    :param mesh_type: In two dimensions with non-tensor-product elements,
        *mesh_type* may be set to ``"X"`` to generate this type
        of mesh::

            _______
            |\   /|
            | \ / |
            |  X  |
            | / \ |
            |/   \|
            ^^^^^^^

        instead of the default::

            _______
            |\    |
            | \   |
            |  \  |
            |   \ |
            |    \|
            ^^^^^^^

        Specifying a value other than *None* for all other mesh
        dimensionalities and element types is an error.

    .. versionchanged:: 2017.1

        *group_factory* parameter added.

    .. versionchanged:: 2020.1

        *boundary_tag_to_face* parameter added.

    .. versionchanged:: 2020.3

        *group_factory* deprecated and renamed to *group_cls*.
    """

    if boundary_tag_to_face is None:
        boundary_tag_to_face = {}

    for iaxis, axc in enumerate(axis_coords):
        if len(axc) < 2:
            raise ValueError("need at least two points along axis %d" %
                             (iaxis + 1))

    dim = len(axis_coords)

    shape = tuple(len(axc) for axc in axis_coords)

    from pytools import product
    nvertices = product(shape)

    vertex_indices = np.arange(nvertices).reshape(*shape)

    vertices = np.empty((dim, ) + shape, dtype=coord_dtype)
    for idim in range(dim):
        vshape = (shape[idim], ) + (1, ) * (dim - 1 - idim)
        vertices[idim] = axis_coords[idim].reshape(*vshape)

    vertices = vertices.reshape(dim, -1)

    from meshmode.mesh import SimplexElementGroup, TensorProductElementGroup
    if group_cls is None:
        group_cls = SimplexElementGroup

    if issubclass(group_cls, SimplexElementGroup):
        is_tp = False
    elif issubclass(group_cls, TensorProductElementGroup):
        is_tp = True
    else:
        raise ValueError(f"unsupported value for 'group_cls': {group_cls}")

    el_vertices = []

    if dim == 1:
        if mesh_type is not None:
            raise ValueError(f"unsupported mesh type: '{mesh_type}'")

        for i in range(shape[0] - 1):
            # a--b

            a = vertex_indices[i]
            b = vertex_indices[i + 1]

            el_vertices.append((
                a,
                b,
            ))

    elif dim == 2:
        if mesh_type == "X" and not is_tp:
            shape_m1 = tuple(si - 1 for si in shape)

            nmidpoints = product(shape_m1)
            midpoint_indices = (
                nvertices +
                np.arange(nmidpoints).reshape(*shape_m1, order="F"))

            midpoints = np.empty((dim, ) + shape_m1, dtype=coord_dtype)
            for idim in range(dim):
                vshape = (shape_m1[idim], ) + (1, ) * idim
                left_axis_coords = axis_coords[idim][:-1]
                right_axis_coords = axis_coords[idim][1:]
                midpoints[idim] = (
                    0.5 *
                    (left_axis_coords + right_axis_coords)).reshape(*vshape)

            midpoints = midpoints.reshape(dim, -1)
            vertices = np.concatenate((vertices, midpoints), axis=1)

        elif mesh_type is None:
            pass

        else:
            raise ValueError(f"unsupported mesh type: '{mesh_type}'")

        for i in range(shape[0] - 1):
            for j in range(shape[1] - 1):

                # c--d
                # |  |
                # a--b

                a = vertex_indices[i, j]
                b = vertex_indices[i + 1, j]
                c = vertex_indices[i, j + 1]
                d = vertex_indices[i + 1, j + 1]

                if is_tp:
                    el_vertices.append((a, b, c, d))

                elif mesh_type == "X":
                    m = midpoint_indices[i, j]
                    el_vertices.append((a, b, m))
                    el_vertices.append((b, d, m))
                    el_vertices.append((d, c, m))
                    el_vertices.append((c, a, m))

                else:
                    el_vertices.append((a, b, c))
                    el_vertices.append((d, c, b))

    elif dim == 3:
        if mesh_type is not None:
            raise ValueError("unsupported mesh_type")

        for i in range(shape[0] - 1):
            for j in range(shape[1] - 1):
                for k in range(shape[2] - 1):

                    a000 = vertex_indices[i, j, k]
                    a001 = vertex_indices[i, j, k + 1]
                    a010 = vertex_indices[i, j + 1, k]
                    a011 = vertex_indices[i, j + 1, k + 1]

                    a100 = vertex_indices[i + 1, j, k]
                    a101 = vertex_indices[i + 1, j, k + 1]
                    a110 = vertex_indices[i + 1, j + 1, k]
                    a111 = vertex_indices[i + 1, j + 1, k + 1]

                    if is_tp:
                        el_vertices.append(
                            (a000, a100, a010, a110, a001, a101, a011, a111))

                    else:
                        el_vertices.append((a000, a100, a010, a001))
                        el_vertices.append((a101, a100, a001, a010))
                        el_vertices.append((a101, a011, a010, a001))

                        el_vertices.append((a100, a010, a101, a110))
                        el_vertices.append((a011, a010, a110, a101))
                        el_vertices.append((a011, a111, a101, a110))

    else:
        raise NotImplementedError("box meshes of dimension %d" % dim)

    el_vertices = np.array(el_vertices, dtype=np.int32)

    grp = make_group_from_vertices(vertices.reshape(dim, -1),
                                   el_vertices,
                                   order,
                                   group_cls=group_cls)

    # {{{ compute facial adjacency for mesh if there is tag information

    facial_adjacency_groups = None
    face_vertex_indices_to_tags = {}
    boundary_tags = list(boundary_tag_to_face.keys())
    axes = ["x", "y", "z", "w"]

    if boundary_tags:
        vert_index_to_tuple = {
            vertex_indices[itup]: itup
            for itup in np.ndindex(shape)
        }

    for tag_idx, tag in enumerate(boundary_tags):
        # Need to map the correct face vertices to the boundary tags
        for face in boundary_tag_to_face[tag]:
            if len(face) != 2:
                raise ValueError("face identifier '%s' does not "
                                 "consist of exactly two characters" % face)

            side, axis = face
            try:
                axis = axes.index(axis)
            except ValueError:
                raise ValueError("unrecognized axis in face identifier '%s'" %
                                 face)
            if axis >= dim:
                raise ValueError(
                    "axis in face identifier '%s' does not exist in %dD" %
                    (face, dim))

            if side == "-":
                vert_crit = 0
            elif side == "+":
                vert_crit = shape[axis] - 1
            else:
                raise ValueError(
                    "first character of face identifier '%s' is not"
                    "'+' or '-'" % face)

            for ielem in range(0, grp.nelements):
                for ref_fvi in grp.face_vertex_indices():
                    fvi = grp.vertex_indices[ielem, ref_fvi]
                    try:
                        fvi_tuples = [vert_index_to_tuple[i] for i in fvi]
                    except KeyError:
                        # Happens for interior faces of "X" meshes because
                        # midpoints aren't in vert_index_to_tuple. We don't
                        # care about them.
                        continue

                    if all(fvi_tuple[axis] == vert_crit
                           for fvi_tuple in fvi_tuples):
                        key = frozenset(fvi)
                        face_vertex_indices_to_tags.setdefault(key,
                                                               []).append(tag)

    if boundary_tags:
        from meshmode.mesh import (_compute_facial_adjacency_from_vertices,
                                   BTAG_ALL, BTAG_REALLY_ALL)
        boundary_tags.extend([BTAG_ALL, BTAG_REALLY_ALL])
        facial_adjacency_groups = _compute_facial_adjacency_from_vertices(
            [grp], boundary_tags, np.int32, np.int8,
            face_vertex_indices_to_tags)
    else:
        facial_adjacency_groups = None

    # }}}

    from meshmode.mesh import Mesh
    return Mesh(vertices, [grp],
                facial_adjacency_groups=facial_adjacency_groups,
                is_conforming=True,
                boundary_tags=boundary_tags)
Exemplo n.º 41
0
    def _apply_single_qubit_ptm(self, qubit, ptm):
        # noinspection PyUnresolvedReferences
        """Apply a one-qubit Pauli transfer matrix to qubit bit.

        Parameters
        ----------
        qubit: int
            Qubit index
        ptm: array-like
            A PTM in the basis of a qubit.
        basis_out: quantumsim.bases.PauliBasis or None
            If provided, will convert qubit basis to specified
            after the PTM application.
        """
        new_shape = list(self._data.shape)
        self._validate_qubit(qubit, 'bit')

        # TODO Refactor to use self._validate_ptm
        if len(ptm.shape) != 2:
            raise ValueError(
                "`ptm` must be a 2D array, got {}D".format(len(ptm.shape)))

        dim_bit_out, dim_bit_in = ptm.shape
        new_shape[qubit] = dim_bit_out
        assert new_shape[qubit] == dim_bit_out
        new_size = pytools.product(new_shape)
        new_size_bytes = new_size * 8

        if self._work_data.gpudata.size < new_size_bytes:
            # reallocate
            self._work_data.gpudata.free()
            self._work_data = ga.empty(new_shape, np.float64)
            self._work_data.gpudata.size = self._work_data.nbytes
        else:
            # reallocation not required,
            # reshape but reuse allocation
            self._work_data = ga.GPUArray(
                shape=new_shape,
                dtype=np.float64,
                gpudata=self._work_data.gpudata,
            )

        ptm_gpu = self._cached_gpuarray(ptm)

        dint = min(64, self._data.size // dim_bit_in)
        block = (1, dim_bit_out, dint)
        blocksize = dim_bit_out * dint
        grid_size = max(1, (new_size - 1) // blocksize + 1)
        grid = (grid_size, 1, 1)

        dim_z = pytools.product(self._data.shape[qubit + 1:])
        dim_y = pytools.product(self._data.shape[:qubit])
        dim_rho = new_size  # self.data.size

        _two_qubit_general_ptm.prepared_call(
            grid,
            block,
            self._data.gpudata,
            self._work_data.gpudata,
            ptm_gpu.gpudata,
            1, dim_bit_in,
            dim_z,
            dim_y,
            dim_rho,
            shared_size=8 * (ptm.size + blocksize))

        self._data, self._work_data = self._work_data, self._data
Exemplo n.º 42
0
    def diagonal(self, *, get_data=True, target_array=None, flatten=True):
        """Obtain the diagonal of the density matrix.

        Parameters
        ----------
        target_array : None or pycuda.gpuarray.array
            An already-allocated GPU array to which the data will be copied.
            If `None`, make a new GPU array.
        get_data : boolean
            Whether the data should be copied from the GPU.
        flatten : boolean
            TODO docstring
        """
        diag_bases = [pb.computational_subbasis() for pb in self.bases]
        diag_shape = [db.dim_pauli for db in diag_bases]
        diag_size = pytools.product(diag_shape)

        if target_array is None:
            if self._work_data.gpudata.size < diag_size * 8:
                self._work_data.gpudata.free()
                self._work_data = ga.empty(diag_shape, np.float64)
                self._work_data.gpudata.size = self._work_data.nbytes
            target_array = self._work_data
        else:
            if target_array.size < diag_size:
                raise ValueError(
                    "Size of `target_gpu_array` is too small ({}).\n"
                    "Should be at least {}."
                    .format(target_array.size, diag_size))

        idx = [[pb.computational_basis_indices[i]
                for i in range(pb.dim_hilbert)
                if pb.computational_basis_indices[i] is not None]
               for pb in self.bases]

        idx_j = np.array(list(pytools.flatten(idx))).astype(np.uint32)
        idx_i = np.cumsum([0] + [len(i) for i in idx][:-1]).astype(np.uint32)

        xshape = np.array(self._data.shape, np.uint32)
        yshape = np.array(diag_shape, np.uint32)

        xshape_gpu = self._cached_gpuarray(xshape)
        yshape_gpu = self._cached_gpuarray(yshape)

        idx_i_gpu = self._cached_gpuarray(idx_i)
        idx_j_gpu = self._cached_gpuarray(idx_j)

        block = (2 ** 8, 1, 1)
        grid = (max(1, (diag_size - 1) // 2 ** 8 + 1), 1, 1)

        if len(yshape) == 0:
            # brain-dead case, but should be handled according to exp.
            target_array.set(self._data.get())
        else:
            _multitake.prepared_call(
                grid, block, self._data.gpudata, target_array.gpudata,
                idx_i_gpu.gpudata, idx_j_gpu.gpudata,
                xshape_gpu.gpudata, yshape_gpu.gpudata,
                np.uint32(len(yshape))
            )

        if get_data:
            if flatten:
                return target_array.get().ravel()[:diag_size]
            else:
                return (target_array.get().ravel()[:diag_size]
                        .reshape(diag_shape))
        else:
            return ga.GPUArray(shape=diag_shape,
                               gpudata=target_array.gpudata,
                               dtype=np.float64)
Exemplo n.º 43
0
 def size(self):
     return pytools.product(self.dim_hilbert) ** 2
Exemplo n.º 44
0
    def _apply_two_qubit_ptm(self, qubit0, qubit1, ptm):
        """Apply a two-qubit Pauli transfer matrix to qubit `bit0` and `bit1`.

        Parameters
        ----------
        ptm: array-like
            A two-qubit ptm in the basis of `bit0` and `bit1`. Must be a 4D
            matrix with dimensions, that correspond to the qubits.
        qubit1 : int
            Index of first qubit
        qubit0: int
            Index of second qubit
        """
        self._validate_qubit(qubit1, 'qubit0')
        self._validate_qubit(qubit0, 'qubit1')
        if len(ptm.shape) != 4:
            raise ValueError(
                "`ptm` must be a 4D array, got {}D".format(len(ptm.shape)))

        # bit0 must be the more significant bit (bit 0 is msb)
        if qubit0 > qubit1:
            qubit0, qubit1 = qubit1, qubit0
            ptm = np.einsum("abcd -> badc", ptm)

        new_shape = list(self._data.shape)
        dim0_out, dim1_out, dim0_in, dim1_in = ptm.shape
        assert new_shape[qubit1] == dim1_in
        assert new_shape[qubit0] == dim0_in
        new_shape[qubit1] = dim1_out
        new_shape[qubit0] = dim0_out
        new_size = pytools.product(new_shape)
        new_size_bytes = new_size * 8

        if self._work_data.gpudata.size < new_size_bytes:
            # reallocate
            self._work_data.gpudata.free()
            self._work_data = ga.empty(new_shape, np.float64)
            self._work_data.gpudata.size = self._work_data.nbytes
        else:
            # reallocation not required,
            # reshape but reuse allocation
            self._work_data = ga.GPUArray(
                shape=new_shape,
                dtype=np.float64,
                gpudata=self._work_data.gpudata,
            )

        ptm_gpu = self._cached_gpuarray(ptm)

        rest_shape = new_shape.copy()
        rest_shape[qubit1] = 1
        rest_shape[qubit0] = 1

        dint = 1
        for i in sorted(rest_shape):
            if i * dint > 256 // (dim0_out * dim1_out):
                break
            else:
                dint *= i

        # dim_a_out, dim_b_out, d_internal (arbitrary)
        block = (dim0_out, dim1_out, dint)
        blocksize = dim1_out * dim0_out * dint
        sh_mem_size = dint * dim1_in * dim0_in  # + ptm.size
        grid_size = max(1, (new_size - 1) // blocksize + 1)
        grid = (grid_size, 1, 1)

        dim_z = pytools.product(self._data.shape[qubit1 + 1:])
        dim_y = pytools.product(self._data.shape[qubit0 + 1:qubit1])
        dim_rho = new_size  # self.data.size

        _two_qubit_general_ptm.prepared_call(
            grid,
            block,
            self._data.gpudata,
            self._work_data.gpudata,
            ptm_gpu.gpudata,
            dim0_in, dim1_in,
            dim_z,
            dim_y,
            dim_rho,
            shared_size=8 * sh_mem_size)

        self._data, self._work_data = self._work_data, self._data
Exemplo n.º 45
0
 def size(self):
     return pytools.product(self.dim_hilbert)**2
Exemplo n.º 46
0
    def nbytes(self):
        from pytools import product

        return product(si for si in self.shape) * self.dtype.itemsize
Exemplo n.º 47
0
def generate_box_mesh(axis_coords, order=1, coord_dtype=np.float64,
        group_factory=None):
    """Create a semi-structured mesh.

    :param axis_coords: a tuple with a number of entries corresponding
        to the number of dimensions, with each entry a numpy array
        specifying the coordinates to be used along that axis.
    :param group_factory: One of :class:`meshmode.mesh.SimplexElementGroup`
        or :class:`meshmode.mesh.TensorProductElementGroup`.

    .. versionchanged:: 2017.1

        *group_factory* parameter added.
    """

    for iaxis, axc in enumerate(axis_coords):
        if len(axc) < 2:
            raise ValueError("need at least two points along axis %d"
                    % (iaxis+1))

    dim = len(axis_coords)

    shape = tuple(len(axc) for axc in axis_coords)

    from pytools import product
    nvertices = product(shape)

    vertex_indices = np.arange(nvertices).reshape(*shape, order="F")

    vertices = np.empty((dim,)+shape, dtype=coord_dtype)
    for idim in range(dim):
        vshape = (shape[idim],) + (1,)*idim
        vertices[idim] = axis_coords[idim].reshape(*vshape)

    vertices = vertices.reshape(dim, -1)

    from meshmode.mesh import SimplexElementGroup, TensorProductElementGroup
    if group_factory is None:
        group_factory = SimplexElementGroup

    if issubclass(group_factory, SimplexElementGroup):
        is_tp = False
    elif issubclass(group_factory, TensorProductElementGroup):
        is_tp = True
    else:
        raise ValueError("unsupported value for 'group_factory': %s"
                % group_factory)

    el_vertices = []

    if dim == 1:
        for i in range(shape[0]-1):
            # a--b

            a = vertex_indices[i]
            b = vertex_indices[i+1]

            el_vertices.append((a, b,))

    elif dim == 2:
        for i in range(shape[0]-1):
            for j in range(shape[1]-1):

                # c--d
                # |  |
                # a--b

                a = vertex_indices[i, j]
                b = vertex_indices[i+1, j]
                c = vertex_indices[i, j+1]
                d = vertex_indices[i+1, j+1]

                if is_tp:
                    el_vertices.append((a, b, c, d))
                else:
                    el_vertices.append((a, b, c))
                    el_vertices.append((d, c, b))

    elif dim == 3:
        for i in range(shape[0]-1):
            for j in range(shape[1]-1):
                for k in range(shape[2]-1):

                    a000 = vertex_indices[i, j, k]
                    a001 = vertex_indices[i, j, k+1]
                    a010 = vertex_indices[i, j+1, k]
                    a011 = vertex_indices[i, j+1, k+1]

                    a100 = vertex_indices[i+1, j, k]
                    a101 = vertex_indices[i+1, j, k+1]
                    a110 = vertex_indices[i+1, j+1, k]
                    a111 = vertex_indices[i+1, j+1, k+1]

                    if is_tp:
                        el_vertices.append(
                                (a000, a001, a010, a011,
                                    a100, a101, a110, a111))

                    else:
                        el_vertices.append((a000, a100, a010, a001))
                        el_vertices.append((a101, a100, a001, a010))
                        el_vertices.append((a101, a011, a010, a001))

                        el_vertices.append((a100, a010, a101, a110))
                        el_vertices.append((a011, a010, a110, a101))
                        el_vertices.append((a011, a111, a101, a110))

    else:
        raise NotImplementedError("box meshes of dimension %d"
                % dim)

    el_vertices = np.array(el_vertices, dtype=np.int32)

    grp = make_group_from_vertices(
            vertices.reshape(dim, -1), el_vertices, order,
            group_factory=group_factory)

    from meshmode.mesh import Mesh
    return Mesh(vertices, [grp],
            is_conforming=True)
Exemplo n.º 48
0
    def get_temporary_decls(self, codegen_state, schedule_index):
        from loopy.kernel.data import AddressSpace

        kernel = codegen_state.kernel

        base_storage_decls = []
        temp_decls = []

        # {{{ declare temporaries

        base_storage_sizes = {}
        base_storage_to_scope = {}
        base_storage_to_align_bytes = {}

        from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line
        # Getting the temporary variables that are needed for the current
        # sub-kernel.
        from loopy.schedule.tools import (
                temporaries_read_in_subkernel,
                temporaries_written_in_subkernel)
        subkernel = kernel.schedule[schedule_index].kernel_name
        sub_knl_temps = (
                temporaries_read_in_subkernel(kernel, subkernel) |
                temporaries_written_in_subkernel(kernel, subkernel))

        for tv in sorted(
                six.itervalues(kernel.temporary_variables),
                key=lambda tv: tv.name):
            decl_info = tv.decl_info(self.target, index_dtype=kernel.index_dtype)

            if not tv.base_storage:
                for idi in decl_info:
                    # global temp vars are mapped to arguments or global declarations
                    if tv.address_space != AddressSpace.GLOBAL and (
                            tv.name in sub_knl_temps):
                        decl = self.wrap_temporary_decl(
                                self.get_temporary_decl(
                                    codegen_state, schedule_index, tv, idi),
                                tv.address_space)

                        if tv.initializer is not None:
                            assert tv.read_only
                            decl = Initializer(decl, generate_array_literal(
                                codegen_state, tv, tv.initializer))

                        temp_decls.append(decl)

            else:
                assert tv.initializer is None

                offset = 0
                base_storage_sizes.setdefault(tv.base_storage, []).append(
                        tv.nbytes)
                base_storage_to_scope.setdefault(tv.base_storage, []).append(
                        tv.address_space)

                align_size = tv.dtype.itemsize

                from loopy.kernel.array import VectorArrayDimTag
                for dim_tag, axis_len in zip(tv.dim_tags, tv.shape):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        align_size *= axis_len

                base_storage_to_align_bytes.setdefault(tv.base_storage, []).append(
                        align_size)

                for idi in decl_info:
                    cast_decl = POD(self, idi.dtype, "")
                    temp_var_decl = POD(self, idi.dtype, idi.name)

                    cast_decl = self.wrap_temporary_decl(cast_decl, tv.address_space)
                    temp_var_decl = self.wrap_temporary_decl(
                            temp_var_decl, tv.address_space)

                    if tv._base_storage_access_may_be_aliasing:
                        ptrtype = _ConstPointer
                    else:
                        # The 'restrict' part of this is a complete lie--of course
                        # all these temporaries are aliased. But we're promising to
                        # not use them to shovel data from one representation to the
                        # other. That counts, right?
                        ptrtype = _ConstRestrictPointer

                    cast_decl = ptrtype(cast_decl)
                    temp_var_decl = ptrtype(temp_var_decl)

                    cast_tp, cast_d = cast_decl.get_decl_pair()
                    temp_var_decl = Initializer(
                            temp_var_decl,
                            "(%s %s) (%s + %s)" % (
                                " ".join(cast_tp), cast_d,
                                tv.base_storage,
                                offset))

                    temp_decls.append(temp_var_decl)

                    from pytools import product
                    offset += (
                            idi.dtype.itemsize
                            * product(si for si in idi.shape))

        ecm = self.get_expression_to_code_mapper(codegen_state)

        for bs_name, bs_sizes in sorted(six.iteritems(base_storage_sizes)):
            bs_var_decl = Value("char", bs_name)
            from pytools import single_valued
            bs_var_decl = self.wrap_temporary_decl(
                    bs_var_decl, single_valued(base_storage_to_scope[bs_name]))

            # FIXME: Could try to use isl knowledge to simplify max.
            if all(isinstance(bs, int) for bs in bs_sizes):
                bs_size_max = max(bs_sizes)
            else:
                bs_size_max = p.Max(tuple(bs_sizes))

            bs_var_decl = ArrayOf(bs_var_decl, ecm(bs_size_max))

            alignment = max(base_storage_to_align_bytes[bs_name])
            bs_var_decl = AlignedAttribute(alignment, bs_var_decl)

            base_storage_decls.append(bs_var_decl)

        # }}}

        result = base_storage_decls + temp_decls

        if result:
            result.append(Line())

        return result
Exemplo n.º 49
0
def adjust_local_temp_var_storage(kernel, device):
    logger.debug("%s: adjust temp var storage" % kernel.name)

    new_temp_vars = {}

    lmem_size = cl_char.usable_local_mem_size(device)
    for temp_var in six.itervalues(kernel.temporary_variables):
        if not temp_var.is_local:
            new_temp_vars[temp_var.name] = \
                    temp_var.copy(storage_shape=temp_var.shape)
            continue

        other_loctemp_nbytes = [
                tv.nbytes
                for tv in six.itervalues(kernel.temporary_variables)
                if tv.is_local and tv.name != temp_var.name]

        storage_shape = temp_var.storage_shape

        if storage_shape is None:
            storage_shape = temp_var.shape

        storage_shape = list(storage_shape)

        # sizes of all dims except the last one, which we may change
        # below to avoid bank conflicts
        from pytools import product

        if device.local_mem_type == cl.device_local_mem_type.GLOBAL:
            # FIXME: could try to avoid cache associativity disasters
            new_storage_shape = storage_shape

        elif device.local_mem_type == cl.device_local_mem_type.LOCAL:
            min_mult = cl_char.local_memory_bank_count(device)
            good_incr = None
            new_storage_shape = storage_shape
            min_why_not = None

            for increment in range(storage_shape[-1]//2):

                test_storage_shape = storage_shape[:]
                test_storage_shape[-1] = test_storage_shape[-1] + increment
                new_mult, why_not = cl_char.why_not_local_access_conflict_free(
                        device, temp_var.dtype.itemsize,
                        temp_var.shape, test_storage_shape)

                # will choose smallest increment 'automatically'
                if new_mult < min_mult:
                    new_lmem_use = (sum(other_loctemp_nbytes)
                            + temp_var.dtype.itemsize*product(test_storage_shape))
                    if new_lmem_use < lmem_size:
                        new_storage_shape = test_storage_shape
                        min_mult = new_mult
                        min_why_not = why_not
                        good_incr = increment

            if min_mult != 1:
                from warnings import warn
                from loopy.diagnostic import LoopyAdvisory
                warn("could not find a conflict-free mem layout "
                        "for local variable '%s' "
                        "(currently: %dx conflict, increment: %s, reason: %s)"
                        % (temp_var.name, min_mult, good_incr, min_why_not),
                        LoopyAdvisory)
        else:
            from warnings import warn
            warn("unknown type of local memory")

            new_storage_shape = storage_shape

        new_temp_vars[temp_var.name] = temp_var.copy(storage_shape=new_storage_shape)

    return kernel.copy(temporary_variables=new_temp_vars)
Exemplo n.º 50
0
 def local_expansions_level_starts(self):
     from pytools import product
     return self._expansions_level_starts(
         lambda nterms: product(self.expansion_shape(nterms)))
Exemplo n.º 51
0
def generate_box_mesh(axis_coords,
                      order=1,
                      coord_dtype=np.float64,
                      group_factory=None,
                      boundary_tag_to_face=None):
    """Create a semi-structured mesh.

    :param axis_coords: a tuple with a number of entries corresponding
        to the number of dimensions, with each entry a numpy array
        specifying the coordinates to be used along that axis.
    :param group_factory: One of :class:`meshmode.mesh.SimplexElementGroup`
        or :class:`meshmode.mesh.TensorProductElementGroup`.
    :param boundary_tag_to_face: an optional dictionary for tagging boundaries.
        The keys correspond to custom boundary tags, with the values giving
        a list of the faces on which they should be applied in terms of coordinate
        directions (``+x``, ``-x``, ``+y``, ``-y``, ``+z``, ``-z``, ``+w``, ``-w``).

        For example::

            boundary_tag_to_face={"bdry_1": ["+x", "+y"], "bdry_2": ["-x"]}

    .. versionchanged:: 2017.1

        *group_factory* parameter added.

    .. versionchanged:: 2020.1

        *boundary_tag_to_face* parameter added.
    """

    if boundary_tag_to_face is None:
        boundary_tag_to_face = {}

    for iaxis, axc in enumerate(axis_coords):
        if len(axc) < 2:
            raise ValueError("need at least two points along axis %d" %
                             (iaxis + 1))

    dim = len(axis_coords)

    shape = tuple(len(axc) for axc in axis_coords)

    from pytools import product
    nvertices = product(shape)

    vertex_indices = np.arange(nvertices).reshape(*shape, order="F")

    vertices = np.empty((dim, ) + shape, dtype=coord_dtype)
    for idim in range(dim):
        vshape = (shape[idim], ) + (1, ) * idim
        vertices[idim] = axis_coords[idim].reshape(*vshape)

    vertices = vertices.reshape(dim, -1)

    from meshmode.mesh import SimplexElementGroup, TensorProductElementGroup
    if group_factory is None:
        group_factory = SimplexElementGroup

    if issubclass(group_factory, SimplexElementGroup):
        is_tp = False
    elif issubclass(group_factory, TensorProductElementGroup):
        is_tp = True
    else:
        raise ValueError("unsupported value for 'group_factory': %s" %
                         group_factory)

    el_vertices = []

    if dim == 1:
        for i in range(shape[0] - 1):
            # a--b

            a = vertex_indices[i]
            b = vertex_indices[i + 1]

            el_vertices.append((
                a,
                b,
            ))

    elif dim == 2:
        for i in range(shape[0] - 1):
            for j in range(shape[1] - 1):

                # c--d
                # |  |
                # a--b

                a = vertex_indices[i, j]
                b = vertex_indices[i + 1, j]
                c = vertex_indices[i, j + 1]
                d = vertex_indices[i + 1, j + 1]

                if is_tp:
                    el_vertices.append((a, b, c, d))
                else:
                    el_vertices.append((a, b, c))
                    el_vertices.append((d, c, b))

    elif dim == 3:
        for i in range(shape[0] - 1):
            for j in range(shape[1] - 1):
                for k in range(shape[2] - 1):

                    a000 = vertex_indices[i, j, k]
                    a001 = vertex_indices[i, j, k + 1]
                    a010 = vertex_indices[i, j + 1, k]
                    a011 = vertex_indices[i, j + 1, k + 1]

                    a100 = vertex_indices[i + 1, j, k]
                    a101 = vertex_indices[i + 1, j, k + 1]
                    a110 = vertex_indices[i + 1, j + 1, k]
                    a111 = vertex_indices[i + 1, j + 1, k + 1]

                    if is_tp:
                        el_vertices.append(
                            (a000, a001, a010, a011, a100, a101, a110, a111))

                    else:
                        el_vertices.append((a000, a100, a010, a001))
                        el_vertices.append((a101, a100, a001, a010))
                        el_vertices.append((a101, a011, a010, a001))

                        el_vertices.append((a100, a010, a101, a110))
                        el_vertices.append((a011, a010, a110, a101))
                        el_vertices.append((a011, a111, a101, a110))

    else:
        raise NotImplementedError("box meshes of dimension %d" % dim)

    el_vertices = np.array(el_vertices, dtype=np.int32)

    grp = make_group_from_vertices(vertices.reshape(dim, -1),
                                   el_vertices,
                                   order,
                                   group_factory=group_factory)

    # {{{ compute facial adjacency for mesh if there is tag information

    facial_adjacency_groups = None
    face_vertex_indices_to_tags = {}
    boundary_tags = list(boundary_tag_to_face.keys())
    axes = ["x", "y", "z", "w"]

    if boundary_tags:
        vert_index_to_tuple = {
            vertex_indices[itup]: itup
            for itup in np.ndindex(shape)
        }

    for tag_idx, tag in enumerate(boundary_tags):
        # Need to map the correct face vertices to the boundary tags
        for face in boundary_tag_to_face[tag]:
            if len(face) != 2:
                raise ValueError("face identifier '%s' does not "
                                 "consist of exactly two characters" % face)

            side, axis = face
            try:
                axis = axes.index(axis)
            except ValueError:
                raise ValueError("unrecognized axis in face identifier '%s'" %
                                 face)
            if axis >= dim:
                raise ValueError(
                    "axis in face identifier '%s' does not exist in %dD" %
                    (face, dim))

            if side == "-":
                vert_crit = 0
            elif side == "+":
                vert_crit = shape[axis] - 1
            else:
                raise ValueError(
                    "first character of face identifier '%s' is not"
                    "'+' or '-'" % face)

            for ielem in range(0, grp.nelements):
                for ref_fvi in grp.face_vertex_indices():
                    fvi = grp.vertex_indices[ielem, ref_fvi]
                    fvi_tuples = [vert_index_to_tuple[i] for i in fvi]

                    if all(fvi_tuple[axis] == vert_crit
                           for fvi_tuple in fvi_tuples):
                        key = frozenset(fvi)
                        face_vertex_indices_to_tags.setdefault(key,
                                                               []).append(tag)

    if boundary_tags:
        from meshmode.mesh import (_compute_facial_adjacency_from_vertices,
                                   BTAG_ALL, BTAG_REALLY_ALL)
        boundary_tags.extend([BTAG_ALL, BTAG_REALLY_ALL])
        facial_adjacency_groups = _compute_facial_adjacency_from_vertices(
            [grp], boundary_tags, np.int32, np.int8,
            face_vertex_indices_to_tags)
    else:
        facial_adjacency_groups = None

    # }}}

    from meshmode.mesh import Mesh
    return Mesh(vertices, [grp],
                facial_adjacency_groups=facial_adjacency_groups,
                is_conforming=True,
                boundary_tags=boundary_tags)
Exemplo n.º 52
0
    def get_temporary_decls(self, codegen_state, schedule_index):
        from loopy.kernel.data import temp_var_scope

        kernel = codegen_state.kernel

        base_storage_decls = []
        temp_decls = []

        # {{{ declare temporaries

        base_storage_sizes = {}
        base_storage_to_scope = {}
        base_storage_to_align_bytes = {}

        from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line

        for tv in sorted(six.itervalues(kernel.temporary_variables),
                         key=lambda tv: tv.name):
            decl_info = tv.decl_info(self.target,
                                     index_dtype=kernel.index_dtype)

            if not tv.base_storage:
                for idi in decl_info:
                    # global temp vars are mapped to arguments or global declarations
                    if tv.scope != temp_var_scope.GLOBAL:
                        decl = self.wrap_temporary_decl(
                            self.get_temporary_decl(kernel, schedule_index, tv,
                                                    idi), tv.scope)

                        if tv.initializer is not None:
                            decl = Initializer(
                                decl,
                                generate_array_literal(codegen_state, tv,
                                                       tv.initializer))

                        temp_decls.append(decl)

            else:
                assert tv.initializer is None

                offset = 0
                base_storage_sizes.setdefault(tv.base_storage,
                                              []).append(tv.nbytes)
                base_storage_to_scope.setdefault(tv.base_storage,
                                                 []).append(tv.scope)

                align_size = tv.dtype.itemsize

                from loopy.kernel.array import VectorArrayDimTag
                for dim_tag, axis_len in zip(tv.dim_tags, tv.shape):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        align_size *= axis_len

                base_storage_to_align_bytes.setdefault(tv.base_storage,
                                                       []).append(align_size)

                for idi in decl_info:
                    cast_decl = POD(self, idi.dtype, "")
                    temp_var_decl = POD(self, idi.dtype, idi.name)

                    cast_decl = self.wrap_temporary_decl(cast_decl, tv.scope)
                    temp_var_decl = self.wrap_temporary_decl(
                        temp_var_decl, tv.scope)

                    # The 'restrict' part of this is a complete lie--of course
                    # all these temporaries are aliased. But we're promising to
                    # not use them to shovel data from one representation to the
                    # other. That counts, right?

                    cast_decl = _ConstRestrictPointer(cast_decl)
                    temp_var_decl = _ConstRestrictPointer(temp_var_decl)

                    cast_tp, cast_d = cast_decl.get_decl_pair()
                    temp_var_decl = Initializer(
                        temp_var_decl, "(%s %s) (%s + %s)" %
                        (" ".join(cast_tp), cast_d, tv.base_storage, offset))

                    temp_decls.append(temp_var_decl)

                    from pytools import product
                    offset += (idi.dtype.itemsize *
                               product(si for si in idi.shape))

        for bs_name, bs_sizes in sorted(six.iteritems(base_storage_sizes)):
            bs_var_decl = Value("char", bs_name)
            from pytools import single_valued
            bs_var_decl = self.wrap_temporary_decl(
                bs_var_decl, single_valued(base_storage_to_scope[bs_name]))
            bs_var_decl = ArrayOf(bs_var_decl, max(bs_sizes))

            alignment = max(base_storage_to_align_bytes[bs_name])
            bs_var_decl = AlignedAttribute(alignment, bs_var_decl)

            base_storage_decls.append(bs_var_decl)

        # }}}

        result = base_storage_decls + temp_decls

        if result:
            result.append(Line())

        return result
Exemplo n.º 53
0
    def get_temporary_decls(self, codegen_state, schedule_index):
        from loopy.kernel.data import AddressSpace

        kernel = codegen_state.kernel

        base_storage_decls = []
        temp_decls = []

        # {{{ declare temporaries

        base_storage_sizes = {}
        base_storage_to_scope = {}
        base_storage_to_align_bytes = {}

        from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line
        # Getting the temporary variables that are needed for the current
        # sub-kernel.
        from loopy.schedule.tools import (
                temporaries_read_in_subkernel,
                temporaries_written_in_subkernel)
        subkernel = kernel.schedule[schedule_index].kernel_name
        sub_knl_temps = (
                temporaries_read_in_subkernel(kernel, subkernel) |
                temporaries_written_in_subkernel(kernel, subkernel))

        for tv in sorted(
                six.itervalues(kernel.temporary_variables),
                key=lambda tv: tv.name):
            decl_info = tv.decl_info(self.target, index_dtype=kernel.index_dtype)

            if not tv.base_storage:
                for idi in decl_info:
                    # global temp vars are mapped to arguments or global declarations
                    if tv.address_space != AddressSpace.GLOBAL and (
                            tv.name in sub_knl_temps):
                        decl = self.wrap_temporary_decl(
                                self.get_temporary_decl(
                                    codegen_state, schedule_index, tv, idi),
                                tv.address_space)

                        if tv.initializer is not None:
                            assert tv.read_only
                            decl = Initializer(decl, generate_array_literal(
                                codegen_state, tv, tv.initializer))

                        temp_decls.append(decl)

            else:
                assert tv.initializer is None

                offset = 0
                base_storage_sizes.setdefault(tv.base_storage, []).append(
                        tv.nbytes)
                base_storage_to_scope.setdefault(tv.base_storage, []).append(
                        tv.address_space)

                align_size = tv.dtype.itemsize

                from loopy.kernel.array import VectorArrayDimTag
                for dim_tag, axis_len in zip(tv.dim_tags, tv.shape):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        align_size *= axis_len

                base_storage_to_align_bytes.setdefault(tv.base_storage, []).append(
                        align_size)

                for idi in decl_info:
                    cast_decl = POD(self, idi.dtype, "")
                    temp_var_decl = POD(self, idi.dtype, idi.name)

                    cast_decl = self.wrap_temporary_decl(cast_decl, tv.address_space)
                    temp_var_decl = self.wrap_temporary_decl(
                            temp_var_decl, tv.address_space)

                    if tv._base_storage_access_may_be_aliasing:
                        ptrtype = _ConstPointer
                    else:
                        # The 'restrict' part of this is a complete lie--of course
                        # all these temporaries are aliased. But we're promising to
                        # not use them to shovel data from one representation to the
                        # other. That counts, right?
                        ptrtype = _ConstRestrictPointer

                    cast_decl = ptrtype(cast_decl)
                    temp_var_decl = ptrtype(temp_var_decl)

                    cast_tp, cast_d = cast_decl.get_decl_pair()
                    temp_var_decl = Initializer(
                            temp_var_decl,
                            "(%s %s) (%s + %s)" % (
                                " ".join(cast_tp), cast_d,
                                tv.base_storage,
                                offset))

                    temp_decls.append(temp_var_decl)

                    from pytools import product
                    offset += (
                            idi.dtype.itemsize
                            * product(si for si in idi.shape))

        ecm = self.get_expression_to_code_mapper(codegen_state)

        for bs_name, bs_sizes in sorted(six.iteritems(base_storage_sizes)):
            bs_var_decl = Value("char", bs_name)
            from pytools import single_valued
            bs_var_decl = self.wrap_temporary_decl(
                    bs_var_decl, single_valued(base_storage_to_scope[bs_name]))

            # FIXME: Could try to use isl knowledge to simplify max.
            if all(isinstance(bs, int) for bs in bs_sizes):
                bs_size_max = max(bs_sizes)
            else:
                bs_size_max = p.Max(tuple(bs_sizes))

            bs_var_decl = ArrayOf(bs_var_decl, ecm(bs_size_max))

            alignment = max(base_storage_to_align_bytes[bs_name])
            bs_var_decl = AlignedAttribute(alignment, bs_var_decl)

            base_storage_decls.append(bs_var_decl)

        # }}}

        result = base_storage_decls + temp_decls

        if result:
            result.append(Line())

        return result
Exemplo n.º 54
0
    def _apply_single_qubit_ptm(self, qubit, ptm):
        # noinspection PyUnresolvedReferences
        """Apply a one-qubit Pauli transfer matrix to qubit bit.

        Parameters
        ----------
        qubit: int
            Qubit index
        ptm: array-like
            A PTM in the basis of a qubit.
        basis_out: quantumsim.bases.PauliBasis or None
            If provided, will convert qubit basis to specified
            after the PTM application.
        """
        new_shape = list(self._data.shape)
        self._validate_qubit(qubit, 'bit')

        # TODO Refactor to use self._validate_ptm
        if len(ptm.shape) != 2:
            raise ValueError("`ptm` must be a 2D array, got {}D".format(
                len(ptm.shape)))

        dim_bit_out, dim_bit_in = ptm.shape
        new_shape[qubit] = dim_bit_out
        assert new_shape[qubit] == dim_bit_out
        new_size = pytools.product(new_shape)
        new_size_bytes = new_size * 8

        if self._work_data.gpudata.size < new_size_bytes:
            # reallocate
            self._work_data.gpudata.free()
            self._work_data = ga.empty(new_shape, np.float64)
            self._work_data.gpudata.size = self._work_data.nbytes
        else:
            # reallocation not required,
            # reshape but reuse allocation
            self._work_data = ga.GPUArray(
                shape=new_shape,
                dtype=np.float64,
                gpudata=self._work_data.gpudata,
            )

        ptm_gpu = self._cached_gpuarray(ptm)

        dint = min(64, self._data.size // dim_bit_in)
        block = (1, dim_bit_out, dint)
        blocksize = dim_bit_out * dint
        grid_size = max(1, (new_size - 1) // blocksize + 1)
        grid = (grid_size, 1, 1)

        dim_z = pytools.product(self._data.shape[qubit + 1:])
        dim_y = pytools.product(self._data.shape[:qubit])
        dim_rho = new_size  # self.data.size

        _two_qubit_general_ptm.prepared_call(grid,
                                             block,
                                             self._data.gpudata,
                                             self._work_data.gpudata,
                                             ptm_gpu.gpudata,
                                             1,
                                             dim_bit_in,
                                             dim_z,
                                             dim_y,
                                             dim_rho,
                                             shared_size=8 *
                                             (ptm.size + blocksize))

        self._data, self._work_data = self._work_data, self._data
Exemplo n.º 55
0
 def grid_point_count(self):
     """Returns the number of grid intervals in each direction.
     """
     return pytools.product(self.grid_point_counts())
Exemplo n.º 56
0
 def map_product(self, expr):
     from pytools import product
     return product(self.rec(child) for child in expr.children)
Exemplo n.º 57
0
    def diagonal(self, *, get_data=True, target_array=None, flatten=True):
        """Obtain the diagonal of the density matrix.

        Parameters
        ----------
        target_array : None or pycuda.gpuarray.array
            An already-allocated GPU array to which the data will be copied.
            If `None`, make a new GPU array.
        get_data : boolean
            Whether the data should be copied from the GPU.
        flatten : boolean
            TODO docstring
        """
        diag_bases = [pb.computational_subbasis() for pb in self.bases]
        diag_shape = [db.dim_pauli for db in diag_bases]
        diag_size = pytools.product(diag_shape)

        if target_array is None:
            if self._work_data.gpudata.size < diag_size * 8:
                self._work_data.gpudata.free()
                self._work_data = ga.empty(diag_shape, np.float64)
                self._work_data.gpudata.size = self._work_data.nbytes
            target_array = self._work_data
        else:
            if target_array.size < diag_size:
                raise ValueError(
                    "Size of `target_gpu_array` is too small ({}).\n"
                    "Should be at least {}.".format(target_array.size,
                                                    diag_size))

        idx = [[
            pb.computational_basis_indices[i] for i in range(pb.dim_hilbert)
            if pb.computational_basis_indices[i] is not None
        ] for pb in self.bases]

        idx_j = np.array(list(pytools.flatten(idx))).astype(np.uint32)
        idx_i = np.cumsum([0] + [len(i) for i in idx][:-1]).astype(np.uint32)

        xshape = np.array(self._data.shape, np.uint32)
        yshape = np.array(diag_shape, np.uint32)

        xshape_gpu = self._cached_gpuarray(xshape)
        yshape_gpu = self._cached_gpuarray(yshape)

        idx_i_gpu = self._cached_gpuarray(idx_i)
        idx_j_gpu = self._cached_gpuarray(idx_j)

        block = (2**8, 1, 1)
        grid = (max(1, (diag_size - 1) // 2**8 + 1), 1, 1)

        if len(yshape) == 0:
            # brain-dead case, but should be handled according to exp.
            target_array.set(self._data.get())
        else:
            _multitake.prepared_call(grid, block, self._data.gpudata,
                                     target_array.gpudata, idx_i_gpu.gpudata,
                                     idx_j_gpu.gpudata,
                                     xshape_gpu.gpudata, yshape_gpu.gpudata,
                                     np.uint32(len(yshape)))

        if get_data:
            if flatten:
                return target_array.get().ravel()[:diag_size]
            else:
                return (
                    target_array.get().ravel()[:diag_size].reshape(diag_shape))
        else:
            return ga.GPUArray(shape=diag_shape,
                               gpudata=target_array.gpudata,
                               dtype=np.float64)