def test_dist_sizes(self):
     dist = Distribution(self.context, (2, 3, 4), dist=('n', 'b', 'c'))
     ddpr = dist.get_dim_data_per_rank()
     shapes = metadata_utils.shapes_from_dim_data_per_rank(ddpr)
     if self.context.nengines == 4:
         self.assertEqual(shapes, [(2, 2, 2), (2, 2, 2), (2, 1, 2),
                                   (2, 1, 2)])
示例#2
0
def create_complex_plane(context, resolution, dist, re_ax, im_ax):
    """Create a DistArray containing points on the complex plane.

    Parameters
    ----------
    context : DistArray Context
    resolution : 2-tuple
        The number of points along Re and Im axes.
    dist : 2-element sequence or dict
        dist_type for of the DistArray Distribution.
    re_ax : 2-tuple
        The (lower, upper) range of the Re axis.
    im_ax : 2-tuple
        The (lower, upper) range of the Im axis.
    """
    import numpy as np
    from kernel import fill_complex_plane

    # Create an empty distributed array.
    distribution = Distribution(context, (resolution[0], resolution[1]),
                                dist=dist)
    complex_plane = context.empty(distribution, dtype=np.complex64)
    context.apply(fill_complex_plane,
                  (complex_plane.key, re_ax, im_ax, resolution))
    return complex_plane
示例#3
0
 def setUpClass(cls):
     # raise a skipTest if plotting import fails
     # (because matplotlib isn't installed, probably)
     cls.plt = import_or_skip("distarray.plotting")
     super(TestPlotting, cls).setUpClass()
     cls.da = Distribution(cls.context, (64, 64))
     cls.arr = cls.context.ones(cls.da)
示例#4
0
def calc_pi(n):
    """Estimate pi using distributed NumPy arrays."""
    distribution = Distribution(context=context, shape=(n, ))
    x = random.rand(distribution)
    y = random.rand(distribution)
    r = hypot(x, y)
    mask = (r < 1)
    return 4 * mask.sum().toarray() / n
 def test_c_size(self):
     dim_dict = {'dist_type': 'c',
                 'size': 42,
                 'proc_grid_size': 2,
                 'proc_grid_rank': 0,
                 'start': 0}
     dist = Distribution.from_global_dim_data(self.context, (dim_dict,))
     ddpr = dist.get_dim_data_per_rank()
     shapes = metadata_utils.shapes_from_dim_data_per_rank(ddpr)
     self.assertEqual(shapes, [(21,), (21,)])
示例#6
0
    def __load_data_from_hdf5(self, data_list):
        ''' Create a distarray from the specified section of the HDF5 file. '''

        for data in data_list:
            input_file = data.backing_file.filename
            dist = self.__calculate_distribution(
                data._get_plugin_data().get_pattern())
            distribution = \
                Distribution(self.context, data.get_shape(), dist=dist)
            data.data = self.context.load_hdf5(
                input_file, distribution=distribution, key=data.name)
示例#7
0
    def __redistribute_data(self, data_list):
        """ Calculate the pattern distributions and if they are not the same\
        redistribute.
        """
        for data in data_list.values():
            patterns = self.__get_distribution_history(data.get_name())

        if patterns[0] != patterns[1]:
            temp = data.data.toarray()
            # *** temporarily creating ndarray
            # distarray (create empty dist array and populate?)
            distribution = \
                Distribution(self.context, data.get_shape(), patterns[-1])  # currently redundant
            data.data = self.context.fromarray(temp, patterns[-1])
示例#8
0
def load_hdf5_distarray(context, filename, key, dist):
    ''' Create a distarray from the specified section of the HDF5 file. '''
    # Filename for load_hdf5() needs the full path.
    pathname = os.path.abspath(filename)
    # Get array shape.
    print('Getting array shape...')
    array_shape = get_hdf5_dataset_shape(pathname, key)
    # Create distribution.
    print('Creating distribution...')
    distribution = Distribution(context, array_shape, dist=dist)
    # Load HDF5 file into DistArray.
    print('Loading HDF5 file...')
    distarray = context.load_hdf5(filename=pathname,
                                  distribution=distribution,
                                  key=key)
    print('Loaded.')
    return distarray
示例#9
0
 def __create_out_data(self, out_data):
     for data in out_data.values():
         dist = self.__calculate_distribution(
             data._get_plugin_data().get_pattern())
         dist = Distribution(self.context, data.get_shape(), dist)
         data.data = self.context.zeros(dist, dtype=np.int32)
示例#10
0
If exits cleanly, then everything is fine.  If exits with an error code, then
there's a problem.

"""

from __future__ import print_function
from distarray.globalapi import Context, Distribution
import numpy as np

c = Context(kind='MPI')

fmt = lambda s: "{:.<25s}:".format(s)

print(fmt("Context"), c)
print(fmt("targets"), c.targets)

if __name__ == '__main__':
    size = len(c.targets) * 100
    print(fmt("size"), size)
    dist = Distribution(c, (size,))
    print(fmt("Distribution"), dist)
    da = c.ones(dist, dtype=np.int64)
    print(fmt("DistArray"), da)
    factor = 2
    db = da * factor
    print(fmt("DistArray"), db)
    sum = db.sum().tondarray()
    print(fmt("sum"), sum)
    print(fmt("sum == factor * size"), sum == size * factor)
    assert sum == size * factor
示例#11
0
def create_distribution_plot_and_documentation(context, params):
    """Create an array distribution plot and the related .rst documentation."""
    def shape_text(shape):
        """ Get a text string describing the array shape. """
        # Always want to display at least N X M.
        if len(shape) == 1:
            shape = (1, shape[0])
        shape_labels = ['%d' % (s) for s in shape]
        shape_text = ' X '.join(shape_labels)
        return shape_text

    title = params['title']
    labels = params['labels']
    shape = params['shape']
    grid_shape = params.get('grid_shape', None)
    text = params.get('text', None)
    dist = params.get('dist', None)
    dimdata = params.get('dimdata', None)
    filename = params.get('filename', None)
    skip = params.get('skip', False)

    if skip:
        return

    # Create array, either from dist or dimdata.
    if dist is not None:
        distribution = Distribution(context,
                                    shape,
                                    dist=dist,
                                    grid_shape=grid_shape)
    elif dimdata is not None:
        distribution = Distribution.from_global_dim_data(context, dimdata)
    else:
        raise ValueError('Must provide either dist or dimdata.')
    array = context.empty(distribution)

    # Fill the array. This is slow but not a real problem here.
    value = 0.0
    if len(shape) == 1:
        for i in range(shape[0]):
            array[i] = value
            value += 1.0
    elif len(shape) == 2:
        for row in range(shape[0]):
            for col in range(shape[1]):
                array[row, col] = value
                value += 1.0
    elif len(shape) == 3:
        for i in range(shape[0]):
            for j in range(shape[1]):
                for k in range(shape[2]):
                    array[i, j, k] = value
                    value += 1.0
    else:
        # TODO: Even better would be to generalize this to any dimensions.
        raise ValueError('Array must be 1, 2, or 3 dimensional.')

    # Get all process grid coordinates.
    # This is duplicating work in print_array_documentation(),
    # but it is needed for the local array plots.
    def _get_process_coords(local_arr):
        return local_arr.cart_coords

    process_coords = context.apply(_get_process_coords, (array.key, ),
                                   targets=array.targets)

    # Plot title and axis labels.
    plot_title = title + ' ' + shape_text(shape) + '\n'
    if len(shape) == 1:
        # add more space for cramped plot.
        plot_title += '\n'
    xlabel = 'Axis 1, %s' % (labels[1])
    ylabel = 'Axis 0, %s' % (labels[0])

    # Documentation title and text description.
    doc_title = title
    dist_text = ' X '.join(["'%s'" % (label) for label in labels])
    # Choose 'a' vs 'an' appropriately.
    if title[0] in 'aeiouAEIOU':
        article = 'an'
    else:
        article = 'a'
    doc_text = 'A (%s) array, with %s %s (%s) distribution over a (%s) process grid.' % (
        shape_text(shape), article, title, dist_text,
        shape_text(array.grid_shape))
    if text is not None:
        doc_text = doc_text + "\n\n" + text

    # Filenames for array plots.
    global_plot_filename = filename
    local_plot_filename = None
    if global_plot_filename is not None:
        root, ext = os.path.splitext(global_plot_filename)
        local_plot_filename = root + '_local' + ext

    # Create plot.
    if len(shape) in [1, 2]:
        plotting.plot_array_distribution(
            array,
            process_coords,
            title=plot_title,
            xlabel=xlabel,
            ylabel=ylabel,
            legend=True,
            global_plot_filename=global_plot_filename,
            local_plot_filename=local_plot_filename)
    else:
        # Not plottable, avoid writing links to missing plots.
        global_plot_filename = None
        local_plot_filename = None

    # Print documentation.
    print_array_documentation(context,
                              array,
                              title=doc_title,
                              text=doc_text,
                              global_plot_filename=global_plot_filename,
                              local_plot_filename=local_plot_filename)
def create_distribution_plot_and_documentation(context, params):
    """Create an array distribution plot and the related .rst documentation."""

    def shape_text(shape):
        """ Get a text string describing the array shape. """
        # Always want to display at least N X M.
        if len(shape) == 1:
            shape = (1, shape[0])
        shape_labels = ['%d' % (s) for s in shape]
        shape_text = ' X '.join(shape_labels)
        return shape_text

    title = params['title']
    labels = params['labels']
    shape = params['shape']
    grid_shape = params.get('grid_shape', None)
    text = params.get('text', None)
    dist = params.get('dist', None)
    dimdata = params.get('dimdata', None)
    filename = params.get('filename', None)
    skip = params.get('skip', False)

    if skip:
        return

    # Create array, either from dist or dimdata.
    if dist is not None:
        distribution = Distribution(context, shape, dist=dist,
                                    grid_shape=grid_shape)
    elif dimdata is not None:
        distribution = Distribution.from_global_dim_data(context, dimdata)
    else:
        raise ValueError('Must provide either dist or dimdata.')
    array = context.empty(distribution)

    # Fill the array. This is slow but not a real problem here.
    value = 0.0
    if len(shape) == 1:
        for i in range(shape[0]):
            array[i] = value
            value += 1.0
    elif len(shape) == 2:
        for row in range(shape[0]):
            for col in range(shape[1]):
                array[row, col] = value
                value += 1.0
    elif len(shape) == 3:
        for i in range(shape[0]):
            for j in range(shape[1]):
                for k in range(shape[2]):
                    array[i, j, k] = value
                    value += 1.0
    else:
        # TODO: Even better would be to generalize this to any dimensions.
        raise ValueError('Array must be 1, 2, or 3 dimensional.')

    # Get all process grid coordinates.
    # This is duplicating work in print_array_documentation(),
    # but it is needed for the local array plots.
    def _get_process_coords(local_arr):
        return local_arr.cart_coords
    process_coords = context.apply(_get_process_coords,
                                   (array.key,),
                                   targets=array.targets)

    # Plot title and axis labels.
    plot_title = title + ' ' + shape_text(shape) + '\n'
    if len(shape) == 1:
        # add more space for cramped plot.
        plot_title += '\n'
    xlabel = 'Axis 1, %s' % (labels[1])
    ylabel = 'Axis 0, %s' % (labels[0])

    # Documentation title and text description.
    doc_title = title
    dist_text = ' X '.join(["'%s'" % (label) for label in labels])
    # Choose 'a' vs 'an' appropriately.
    if title[0] in 'aeiouAEIOU':
        article = 'an'
    else:
        article = 'a'
    doc_text = 'A (%s) array, with %s %s (%s) distribution over a (%s) process grid.' % (
        shape_text(shape), article, title, dist_text, shape_text(array.grid_shape))
    if text is not None:
        doc_text = doc_text + "\n\n" + text

    # Filenames for array plots.
    global_plot_filename = filename
    local_plot_filename = None
    if global_plot_filename is not None:
        root, ext = os.path.splitext(global_plot_filename)
        local_plot_filename = root + '_local' + ext

    # Create plot.
    if len(shape) in [1, 2]:
        plotting.plot_array_distribution(
            array,
            process_coords,
            title=plot_title,
            xlabel=xlabel,
            ylabel=ylabel,
            legend=True,
            global_plot_filename=global_plot_filename,
            local_plot_filename=local_plot_filename)
    else:
        # Not plottable, avoid writing links to missing plots.
        global_plot_filename = None
        local_plot_filename = None

    # Print documentation.
    print_array_documentation(
        context,
        array,
        title=doc_title,
        text=doc_text,
        global_plot_filename=global_plot_filename,
        local_plot_filename=local_plot_filename)