Пример #1
0
 def get_block_addresses(self, grid: ArrayGrid):
     addresses: dict = {}
     if isinstance(self.scheduler, BlockCyclicScheduler):
         scheduler: BlockCyclicScheduler = self.scheduler
         for grid_entry in grid.get_entry_iterator():
             node: Dict = scheduler.cluster_grid[
                 scheduler.get_cluster_entry(grid_entry)]
             node_key = list(
                 filter(lambda key: "node" in key,
                        node["Resources"].keys()))
             assert len(node_key) == 1
             node_key = node_key[0]
             addresses[grid_entry] = node_key
     elif isinstance(self.scheduler, TaskScheduler):
         # Just do round-robin over nodes.
         nodes = self.nodes()
         index = 0
         for grid_entry in grid.get_entry_iterator():
             node = nodes[index]
             node_key = list(
                 filter(lambda key: "node" in key,
                        node["Resources"].keys()))
             assert len(node_key) == 1
             node_key = node_key[0]
             addresses[grid_entry] = node_key
             index = (index + 1) % len(nodes)
     return addresses
Пример #2
0
 def _group_index_lists_by_block(self, dst_slice_tuples,
                                 src_grid: ArrayGrid, dst_index_list,
                                 src_index_list):
     # TODO(hme): Keep this function here until it's needed for greater support of
     #  selection/assignment operations.
     # Block grid entries needed to write to given dst_slice_selection.
     src_blocks = {}
     dst_slice_np = np.array(dst_slice_tuples).T
     dst_index_arr = np.array(dst_index_list)
     src_index_arr = np.array(src_index_list)
     # Pick the smallest type to represent indices.
     # A set of these indices may be transmitted over the network,
     # so we want to pick the smallest encoding possible.
     index_types = [(2**8, np.uint8), (2**16, np.uint16),
                    (2**32, np.uint32), (2**64, np.uint64)]
     index_type = None
     for bound, curr_index_type in index_types:
         if np.all(np.array(src_grid.block_shape) < bound) and np.all(dst_slice_np[1] < bound):
             index_type = curr_index_type
             break
     if index_type is None:
         raise Exception("Unable to encode block indices, blocks are too large.")
     for grid_entry in src_grid.get_entry_iterator():
         src_slice_np = np.array(src_grid.get_slice_tuples(grid_entry)).T
         index_pairs = []
         for i in range(src_index_arr.shape[0]):
             src_index = src_index_arr[i]
             dst_index = dst_index_arr[i]
             if np.all((src_slice_np[0] <= src_index) & (src_index < src_slice_np[1])):
                 index_pair = ((dst_index - dst_slice_np[0]).astype(index_type),
                               (src_index - src_slice_np[0]).astype(index_type))
                 index_pairs.append(index_pair)
         if len(index_pairs) > 0:
             src_blocks[grid_entry] = index_pairs
     return src_blocks
Пример #3
0
 def from_oid(cls, oid, shape, dtype, system):
     block_shape = shape
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     ba = BlockArray(grid, system)
     for i, grid_entry in enumerate(grid.get_entry_iterator()):
         assert i == 0
         ba.blocks[grid_entry].oid = oid
     return ba
Пример #4
0
    def from_np(cls, arr, block_shape, copy, system):
        dtype_str = str(arr.dtype)
        grid = ArrayGrid(arr.shape, block_shape, dtype_str)
        rarr = SparseBlockArray(grid, system)
        grid_entry_iterator = grid.get_entry_iterator()
        for grid_entry in grid_entry_iterator:
            grid_slice = grid.get_slice(grid_entry)
            block = scipy.sparse.csr_matrix(arr[grid_slice])

            rarr.blocks[grid_entry].oid = system.put(block)
            rarr.blocks[grid_entry].dtype = getattr(np, dtype_str)
        return rarr
Пример #5
0
 def empty(cls, shape, block_shape, dtype, system):
     grid = ArrayGrid(shape=shape,
                      block_shape=block_shape,
                      dtype=dtype.__name__)
     grid_meta = grid.to_meta()
     arr = BlockArray(grid, system)
     for grid_entry in grid.get_entry_iterator():
         arr.blocks[grid_entry].oid = system.empty(grid_entry, grid_meta,
                                                   syskwargs={
                                                       "grid_entry": grid_entry,
                                                       "grid_shape": grid.grid_shape
                                                   })
     return arr
Пример #6
0
 def from_np(cls, arr, block_shape, copy, system):
     dtype_str = str(arr.dtype)
     grid = ArrayGrid(arr.shape, block_shape, dtype_str)
     rarr = BlockArray(grid, system)
     grid_entry_iterator = grid.get_entry_iterator()
     for grid_entry in grid_entry_iterator:
         grid_slice = grid.get_slice(grid_entry)
         block = arr[grid_slice]
         if copy:
             block = np.copy(block)
         rarr.blocks[grid_entry].oid = system.put(block)
         rarr.blocks[grid_entry].dtype = getattr(np, dtype_str)
     return rarr
Пример #7
0
 def get_block_addresses(self, grid: ArrayGrid):
     addresses: dict = {}
     nodes = self.nodes()
     index = 0
     for grid_entry in grid.get_entry_iterator():
         node = nodes[index]
         node_key = list(
             filter(lambda key: "node" in key, node["Resources"].keys()))
         assert len(node_key) == 1
         node_key = node_key[0]
         addresses[grid_entry] = node_key
         index = (index + 1) % len(nodes)
     return addresses
Пример #8
0
 def read_csv(self,
              filename,
              dtype=float,
              delimiter=',',
              has_header=False,
              num_workers=4):
     file_size = storage_utils.get_file_size(filename)
     file_batches: storage_utils.Batch = storage_utils.Batch.from_num_batches(
         file_size, num_workers)
     blocks = []
     shape_oids = []
     for i, batch in enumerate(file_batches.batches):
         file_start, file_end = batch
         block_oid, shape_oid = self.system.call("read_csv_block",
                                                 filename,
                                                 file_start,
                                                 file_end,
                                                 dtype,
                                                 delimiter,
                                                 has_header,
                                                 syskwargs={
                                                     "grid_entry": (i, ),
                                                     "grid_shape":
                                                     (num_workers, ),
                                                     "options": {
                                                         "num_returns": 2
                                                     }
                                                 })
         blocks.append(block_oid)
         shape_oids.append(shape_oid)
     shapes = self.system.get(shape_oids)
     arrays = []
     for i in range(len(shapes)):
         shape = shapes[i]
         if shape[0] == 0:
             continue
         block = blocks[i]
         grid = ArrayGrid(shape=shape,
                          block_shape=shape,
                          dtype=dtype.__name__)
         arr = BlockArray(grid, self.system)
         iter_one = True
         for grid_entry in grid.get_entry_iterator():
             assert iter_one
             iter_one = False
             arr.blocks[grid_entry].oid = block
         arrays.append(arr)
     return arrays
Пример #9
0
 def arange(self, shape, block_shape, step=1, dtype=np.int64) -> BlockArray:
     assert step == 1
     # Generate ranges per block.
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     rarr = BlockArray(grid, self.system)
     for _, grid_entry in enumerate(grid.get_entry_iterator()):
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": grid.grid_shape
         }
         start = block_shape[0] * grid_entry[0]
         entry_shape = grid.get_block_shape(grid_entry)
         stop = start + entry_shape[0]
         rarr.blocks[grid_entry].oid = self.system.arange(
             start, stop, step, dtype, syskwargs=syskwargs)
     return rarr
Пример #10
0
 def _block_map_bop(self, op_name: str, arr_a: BlockArray,
                    arr_b: BlockArray) -> BlockArray:
     shape = arr_a.shape
     block_shape = arr_a.block_shape
     dtype = array_utils.get_bop_output_type("log", arr_a.dtype,
                                             arr_b.dtype)
     assert len(shape) == len(block_shape)
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     rarr = BlockArray(grid, self._system)
     op = self._system.__getattribute__(op_name)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = op(arr_a.blocks[grid_entry].oid,
                                          arr_b.blocks[grid_entry].oid,
                                          syskwargs={
                                              "grid_entry": grid_entry,
                                              "grid_shape": grid.grid_shape
                                          })
     return rarr
Пример #11
0
 def loadtxt(self, fname, dtype=float, comments='# ', delimiter=' ',
             converters=None, skiprows=0, usecols=None, unpack=False,
             ndmin=0, encoding='bytes', max_rows=None, num_workers=4) -> BlockArray:
     # pylint: disable=unused-variable
     bytes_per_char, bytes_per_row, bytes_per_col, num_cols = storage_utils.get_np_txt_info(
         fname, comments, delimiter
     )
     chars_per_row = bytes_per_row // bytes_per_char
     assert np.allclose(float(chars_per_row), bytes_per_row / bytes_per_char)
     comment_lines, trailing_newlines = storage_utils.get_np_comments(fname, comments)
     nonrow_chars = trailing_newlines
     for line in comment_lines:
         nonrow_chars += len(line)
     file_size = storage_utils.get_file_size(fname)
     file_chars = file_size // bytes_per_char
     assert np.allclose(float(file_chars), file_size / bytes_per_char)
     row_chars = file_chars - nonrow_chars
     num_rows = row_chars // chars_per_row
     assert np.allclose(float(num_rows), float(row_chars / chars_per_row))
     num_rows_final = num_rows - skiprows
     if max_rows is not None:
         num_rows_final = (num_rows_final, max_rows)
     row_batches: storage_utils.Batch = storage_utils.Batch.from_num_batches(num_rows_final,
                                                                             num_workers)
     grid = ArrayGrid(shape=(num_rows_final, num_cols),
                      block_shape=(row_batches.batch_size, num_cols),
                      dtype=np.float64.__name__ if dtype is float else dtype.__name__)
     result: BlockArray = BlockArray(grid, system=self.system)
     for i, grid_entry in enumerate(grid.get_entry_iterator()):
         row_start, row_end = row_batches.batches[i]
         batch_skiprows = skiprows + row_start + 1
         batch_max_rows = grid.get_block_shape(grid_entry)[0]
         assert batch_max_rows == row_end - row_start
         result.blocks[grid_entry].oid = self.loadtxt_block(
             fname, dtype=dtype, comments=comments, delimiter=delimiter,
             converters=converters, skiprows=batch_skiprows,
             usecols=usecols, unpack=unpack, ndmin=ndmin,
             encoding=encoding, max_rows=batch_max_rows,
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             }
         )
     return result
Пример #12
0
 def eye(self, shape: tuple, block_shape: tuple, dtype: np.dtype = None):
     assert len(shape) == len(block_shape) == 2
     if dtype is None:
         dtype = np.float64
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     grid_meta = grid.to_meta()
     rarr = BlockArray(grid, self.system)
     for grid_entry in grid.get_entry_iterator():
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": grid.grid_shape
         }
         if np.all(np.diff(grid_entry) == 0):
             # This is a diagonal block.
             rarr.blocks[grid_entry].oid = self.system.new_block(
                 "eye", grid_entry, grid_meta, syskwargs=syskwargs)
         else:
             rarr.blocks[grid_entry].oid = self.system.new_block(
                 "zeros", grid_entry, grid_meta, syskwargs=syskwargs)
     return rarr
Пример #13
0
    def _simple_reshape(self, arr, shape, block_shape):
        # Reshape the array of blocks only.
        # This is only used when the difference in shape are factors of 1s,
        # and the ordering of other factors are maintained.

        # Check assumptions.
        assert len(self._strip_ones(arr.shape)) == len(self._strip_ones(shape))

        # Create new grid, and perform reshape on blocks
        # to simplify access to source blocks.
        grid = ArrayGrid(shape, block_shape, dtype=arr.dtype.__name__)
        src_blocks = arr.blocks.reshape(grid.grid_shape)
        rarr = BlockArray(grid, arr.system)
        for grid_entry in grid.get_entry_iterator():
            src_block: Block = src_blocks[grid_entry]
            dst_block: Block = rarr.blocks[grid_entry]
            syskwargs = {"grid_entry": grid_entry, "grid_shape": grid.grid_shape}
            dst_block.oid = arr.system.reshape(src_block.oid,
                                               dst_block.shape,
                                               syskwargs=syskwargs)
        return rarr
Пример #14
0
 def _new_array(self,
                op_name: str,
                shape: tuple,
                block_shape: tuple,
                dtype: np.dtype = None):
     assert len(shape) == len(block_shape)
     if dtype is None:
         dtype = np.float64
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     grid_meta = grid.to_meta()
     rarr = BlockArray(grid, self._system)
     for grid_entry in grid.get_entry_iterator():
         rarr.blocks[grid_entry].oid = self._system.new_block(
             op_name,
             grid_entry,
             grid_meta,
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             })
     return rarr
Пример #15
0
 def diag(self, X: BlockArray) -> BlockArray:
     if len(X.shape) == 1:
         shape = X.shape[0], X.shape[0]
         block_shape = X.block_shape[0], X.block_shape[0]
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         grid_meta = grid.to_meta()
         rarr = BlockArray(grid, self.system)
         for grid_entry in grid.get_entry_iterator():
             syskwargs = {
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[grid_entry].oid = self.system.diag(
                     X.blocks[grid_entry[0]].oid, syskwargs=syskwargs)
             else:
                 rarr.blocks[grid_entry].oid = self.system.new_block(
                     "zeros", grid_entry, grid_meta, syskwargs=syskwargs)
     elif len(X.shape) == 2:
         assert X.shape[0] == X.shape[1]
         assert X.block_shape[0] == X.block_shape[1]
         shape = X.shape[0],
         block_shape = X.block_shape[0],
         grid = ArrayGrid(shape, block_shape, X.dtype.__name__)
         rarr = BlockArray(grid, self.system)
         for grid_entry in X.grid.get_entry_iterator():
             out_grid_entry = grid_entry[:1]
             out_grid_shape = grid.grid_shape[:1]
             syskwargs = {
                 "grid_entry": out_grid_entry,
                 "grid_shape": out_grid_shape
             }
             if np.all(np.diff(grid_entry) == 0):
                 # This is a diagonal block.
                 rarr.blocks[out_grid_entry].oid = self.system.diag(
                     X.blocks[grid_entry].oid, syskwargs=syskwargs)
     else:
         raise ValueError("X must have 1 or 2 axes.")
     return rarr
Пример #16
0
 def map_uop(self,
             op_name: str,
             arr: BlockArray,
             out: BlockArray = None,
             where=True,
             args=None,
             kwargs=None) -> BlockArray:
     """
     A map, for unary operators, that applies to every entry of an array.
     :param op_name: An element-wise unary operator.
     :param arr: A BlockArray.
     :param out: A BlockArray to which the result is written.
     :param where: An indicator specifying the indices to which op is applied.
     :param args: Args provided to op.
     :param kwargs: Keyword args provided to op.
     :return: A BlockArray.
     """
     if where is not True:
         raise NotImplementedError("'where' argument is not yet supported.")
     args = () if args is None else args
     kwargs = {} if kwargs is None else kwargs
     shape = arr.shape
     block_shape = arr.block_shape
     dtype = array_utils.get_uop_output_type(op_name, arr.dtype)
     assert len(shape) == len(block_shape)
     if out is None:
         grid = ArrayGrid(shape, block_shape, dtype.__name__)
         rarr = BlockArray(grid, self.system)
     else:
         rarr = out
         grid = rarr.grid
         assert rarr.shape == arr.shape and rarr.block_shape == arr.block_shape
     for grid_entry in grid.get_entry_iterator():
         # TODO(hme): Faster to create ndarray first,
         #  and instantiate block array on return
         #  to avoid instantiating blocks on BlockArray initialization.
         rarr.blocks[grid_entry] = arr.blocks[grid_entry].uop_map(
             op_name, args=args, kwargs=kwargs)
     return rarr
Пример #17
0
 def from_blocks(cls, arr: np.ndarray, result_shape, system):
     sample_idx = tuple(0 for dim in arr.shape)
     if isinstance(arr, Block):
         sample_block = arr
         result_shape = ()
     else:
         sample_block = arr[sample_idx]
         if result_shape is None:
             result_shape = array_utils.shape_from_block_array(arr)
     result_block_shape = sample_block.shape
     result_dtype_str = sample_block.dtype.__name__
     result_grid = ArrayGrid(shape=result_shape,
                             block_shape=result_block_shape,
                             dtype=result_dtype_str)
     assert arr.shape == result_grid.grid_shape
     result = BlockArray(result_grid, system)
     for grid_entry in result_grid.get_entry_iterator():
         if isinstance(arr, Block):
             block: Block = arr
         else:
             block: Block = arr[grid_entry]
         result.blocks[grid_entry] = block
     return result
Пример #18
0
    def reshape(self, shape=None, block_shape=None):
        # TODO (hme): Add support for arbitrary reshape.
        if shape is None:
            shape = self.shape
        if block_shape is None:
            block_shape = self.block_shape
        if shape == self.shape and block_shape == self.block_shape:
            return self

        temp_shape = shape
        temp_block_shape = block_shape
        shape = []
        block_shape = []
        negative_one = False
        for i, dim in enumerate(temp_shape):
            if dim == -1:
                assert len(self.shape) == 1
                if negative_one:
                    raise Exception("Only one -1 permitted in reshape.")
                negative_one = True
                shape.append(self.shape[i])
                assert temp_block_shape[i] == -1
                block_shape.append(self.block_shape[0])
            else:
                shape.append(dim)
                block_shape.append(temp_block_shape[i])
        del temp_shape
        shape = tuple(shape)
        block_shape = tuple(block_shape)

        assert np.product(shape) == np.product(self.shape)
        # Make sure the difference is either a preceding or succeeding one.
        if len(shape) > len(self.shape):
            if shape[0] == 1:
                grid_entry_op = "shift"
                assert shape[1:] == self.shape
            elif shape[-1] == 1:
                grid_entry_op = "pop"
                assert shape[:-1] == self.shape
            else:
                raise Exception()
        elif len(shape) < len(self.shape):
            if self.shape[0] == 1:
                grid_entry_op = "prep"
                assert self.shape[1:] == shape
            elif self.shape[-1] == 1:
                grid_entry_op = "app"
                assert self.shape[:-1] == shape
            else:
                raise Exception()
        else:
            grid_entry_op = "none"
            assert self.shape == shape

        grid = ArrayGrid(shape=shape,
                         block_shape=block_shape,
                         dtype=self.grid.dtype.__name__)
        grid_meta = grid.to_meta()
        rarr = BlockArray(grid, self.system)
        for grid_entry in grid.get_entry_iterator():
            rarr.blocks[grid_entry].oid = self.system.empty(grid_entry, grid_meta,
                                                            syskwargs={
                                                                "grid_entry": grid_entry,
                                                                "grid_shape": grid.grid_shape
                                                            })
            grid_entry_slice = grid.get_slice(grid_entry)
            if grid_entry_op == "shift":
                grid_entry_slice = tuple([0] + list(grid_entry_slice)[1:])
                self_grid_entry_slice = self.grid.get_slice(grid_entry[1:])
            elif grid_entry_op == "pop":
                grid_entry_slice = tuple(list(grid_entry_slice)[:-1] + [0])
                self_grid_entry_slice = self.grid.get_slice(grid_entry[:-1])
            elif grid_entry_op == "prep":
                self_grid_entry_slice = self.grid.get_slice(tuple([0] + list(grid_entry)))
            elif grid_entry_op == "prep":
                self_grid_entry_slice = self.grid.get_slice(tuple(list(grid_entry) + [0]))
            else:
                assert grid_entry_op == "none"
                self_grid_entry_slice = grid_entry_slice

            # TODO (hme): This is costly.
            rarr[grid_entry_slice] = self[self_grid_entry_slice]
        return rarr
Пример #19
0
    def reduce_axis(self, op_name, axis, keepdims=False):
        if not (axis is None or isinstance(axis, (int, np.int32, np.int64))):
            raise NotImplementedError("Only integer axis is currently supported.")
        result_blocks = np.empty_like(self.blocks, dtype=Block)
        for grid_entry in self.grid.get_entry_iterator():
            result_blocks[grid_entry] = self.blocks[grid_entry].reduce_axis(op_name,
                                                                            axis,
                                                                            keepdims=keepdims)
        result_shape = []
        result_block_shape = []
        for curr_axis in range(len(self.shape)):
            axis_size, axis_block_size = self.shape[curr_axis], self.block_shape[curr_axis]
            if curr_axis == axis or axis is None:
                if keepdims:
                    axis_size, axis_block_size = 1, 1
                else:
                    continue
            result_shape.append(axis_size)
            result_block_shape.append(axis_block_size)
        result_shape = tuple(result_shape)
        result_block_shape = tuple(result_block_shape)
        result_dtype = array_utils.get_reduce_output_type(op_name, self.dtype)
        result_grid = ArrayGrid(shape=result_shape,
                                block_shape=result_block_shape,
                                dtype=result_dtype.__name__)
        result = BlockArray(result_grid, self.system)

        if op_name in settings.np_pairwise_reduction_map:
            # Do a pairwise reduction with the pairwise reduction op.
            pairwise_op_name = settings.np_pairwise_reduction_map.get(op_name, op_name)
            if axis is None:
                reduced_block: Block = None
                for grid_entry in self.grid.get_entry_iterator():
                    if reduced_block is None:
                        reduced_block = result_blocks[grid_entry]
                        continue
                    next_block = result_blocks[grid_entry]
                    reduced_block = reduced_block.bop(pairwise_op_name, next_block, {})
                if result.shape == ():
                    result.blocks[()] = reduced_block
                else:
                    result.blocks[:] = reduced_block

            else:
                for result_grid_entry in result_grid.get_entry_iterator():
                    reduced_block: Block = None
                    for sum_dim in range(self.grid.grid_shape[axis]):
                        grid_entry = list(result_grid_entry)
                        if keepdims:
                            grid_entry[axis] = sum_dim
                        else:
                            grid_entry = grid_entry[:axis] + [sum_dim] + grid_entry[axis:]
                        grid_entry = tuple(grid_entry)
                        next_block: Block = result_blocks[grid_entry]
                        if reduced_block is None:
                            reduced_block = next_block
                        else:
                            reduced_block = reduced_block.bop(pairwise_op_name, next_block, {})
                    result.blocks[result_grid_entry] = reduced_block
        else:
            op_func = np.__getattribute__(op_name)
            if result.shape == ():
                result.blocks[()] = op_func(result_blocks, axis=axis, keepdims=keepdims)
            else:
                result.blocks = op_func(result_blocks, axis=axis, keepdims=keepdims)
        return result