Exemplo n.º 1
0
    def arange(self,
               start_in,
               shape,
               block_shape,
               step=1,
               dtype=None) -> BlockArray:
        assert step == 1
        if dtype is None:
            dtype = np.__getattribute__(
                str(np.result_type(start_in, shape[0] + start_in)))

        # Generate ranges per block.
        grid = ArrayGrid(shape, block_shape, dtype.__name__)
        rarr = BlockArray(grid, self.cm)
        for _, grid_entry in enumerate(grid.get_entry_iterator()):
            syskwargs = {
                "grid_entry": grid_entry,
                "grid_shape": grid.grid_shape
            }
            start = start_in + block_shape[0] * grid_entry[0]
            entry_shape = grid.get_block_shape(grid_entry)
            stop = start + entry_shape[0]
            rarr.blocks[grid_entry].oid = self.cm.arange(start,
                                                         stop,
                                                         step,
                                                         dtype,
                                                         syskwargs=syskwargs)
        return rarr
Exemplo n.º 2
0
 def loadtxt(self,
             fname,
             dtype=float,
             comments='# ',
             delimiter=' ',
             converters=None,
             skiprows=0,
             usecols=None,
             unpack=False,
             ndmin=0,
             encoding='bytes',
             max_rows=None,
             num_workers=4) -> BlockArray:
     # pylint: disable=unused-variable
     bytes_per_char, bytes_per_row, bytes_per_col, num_cols = storage_utils.get_np_txt_info(
         fname, comments, delimiter)
     chars_per_row = bytes_per_row // bytes_per_char
     assert np.allclose(float(chars_per_row),
                        bytes_per_row / bytes_per_char)
     comment_lines, trailing_newlines = storage_utils.get_np_comments(
         fname, comments)
     nonrow_chars = trailing_newlines
     for line in comment_lines:
         nonrow_chars += len(line)
     file_size = storage_utils.get_file_size(fname)
     file_chars = file_size // bytes_per_char
     assert np.allclose(float(file_chars), file_size / bytes_per_char)
     row_chars = file_chars - nonrow_chars
     num_rows = row_chars // chars_per_row
     assert np.allclose(float(num_rows), float(row_chars / chars_per_row))
     num_rows_final = num_rows - skiprows
     if max_rows is not None:
         num_rows_final = (num_rows_final, max_rows)
     row_batches: storage_utils.Batch = storage_utils.Batch.from_num_batches(
         num_rows_final, num_workers)
     grid = ArrayGrid(
         shape=(num_rows_final, num_cols),
         block_shape=(row_batches.batch_size, num_cols),
         dtype=np.float64.__name__ if dtype is float else dtype.__name__)
     result: BlockArray = BlockArray(grid, cm=self.cm)
     for i, grid_entry in enumerate(grid.get_entry_iterator()):
         row_start, row_end = row_batches.batches[i]
         batch_skiprows = skiprows + row_start
         batch_max_rows = grid.get_block_shape(grid_entry)[0]
         assert batch_max_rows == row_end - row_start
         result.blocks[grid_entry].oid = self.loadtxt_block(
             fname,
             dtype=dtype,
             comments=comments,
             delimiter=delimiter,
             converters=converters,
             skiprows=batch_skiprows,
             usecols=usecols,
             unpack=unpack,
             ndmin=ndmin,
             encoding=encoding,
             max_rows=batch_max_rows,
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             })
     return result