Пример #1
0
 def arange(self, shape, block_shape, step=1, dtype=np.int64) -> BlockArray:
     assert step == 1
     # Generate ranges per block.
     grid = ArrayGrid(shape, block_shape, dtype.__name__)
     rarr = BlockArray(grid, self.system)
     for _, grid_entry in enumerate(grid.get_entry_iterator()):
         syskwargs = {
             "grid_entry": grid_entry,
             "grid_shape": grid.grid_shape
         }
         start = block_shape[0] * grid_entry[0]
         entry_shape = grid.get_block_shape(grid_entry)
         stop = start + entry_shape[0]
         rarr.blocks[grid_entry].oid = self.system.arange(
             start, stop, step, dtype, syskwargs=syskwargs)
     return rarr
Пример #2
0
 def loadtxt(self, fname, dtype=float, comments='# ', delimiter=' ',
             converters=None, skiprows=0, usecols=None, unpack=False,
             ndmin=0, encoding='bytes', max_rows=None, num_workers=4) -> BlockArray:
     # pylint: disable=unused-variable
     bytes_per_char, bytes_per_row, bytes_per_col, num_cols = storage_utils.get_np_txt_info(
         fname, comments, delimiter
     )
     chars_per_row = bytes_per_row // bytes_per_char
     assert np.allclose(float(chars_per_row), bytes_per_row / bytes_per_char)
     comment_lines, trailing_newlines = storage_utils.get_np_comments(fname, comments)
     nonrow_chars = trailing_newlines
     for line in comment_lines:
         nonrow_chars += len(line)
     file_size = storage_utils.get_file_size(fname)
     file_chars = file_size // bytes_per_char
     assert np.allclose(float(file_chars), file_size / bytes_per_char)
     row_chars = file_chars - nonrow_chars
     num_rows = row_chars // chars_per_row
     assert np.allclose(float(num_rows), float(row_chars / chars_per_row))
     num_rows_final = num_rows - skiprows
     if max_rows is not None:
         num_rows_final = (num_rows_final, max_rows)
     row_batches: storage_utils.Batch = storage_utils.Batch.from_num_batches(num_rows_final,
                                                                             num_workers)
     grid = ArrayGrid(shape=(num_rows_final, num_cols),
                      block_shape=(row_batches.batch_size, num_cols),
                      dtype=np.float64.__name__ if dtype is float else dtype.__name__)
     result: BlockArray = BlockArray(grid, system=self.system)
     for i, grid_entry in enumerate(grid.get_entry_iterator()):
         row_start, row_end = row_batches.batches[i]
         batch_skiprows = skiprows + row_start + 1
         batch_max_rows = grid.get_block_shape(grid_entry)[0]
         assert batch_max_rows == row_end - row_start
         result.blocks[grid_entry].oid = self.loadtxt_block(
             fname, dtype=dtype, comments=comments, delimiter=delimiter,
             converters=converters, skiprows=batch_skiprows,
             usecols=usecols, unpack=unpack, ndmin=ndmin,
             encoding=encoding, max_rows=batch_max_rows,
             syskwargs={
                 "grid_entry": grid_entry,
                 "grid_shape": grid.grid_shape
             }
         )
     return result