示例#1
0
    def test_export_rectangles(self):
        nd = np.arange(0, 80, dtype=float).reshape(8, 10)

        rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]]

        rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]]

        rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3],
                  [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8],
                  [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]]

        for rects in [rects1, rects2, rects3]:
            for block_size in [3, 4, 10]:
                rect_path = new_local_temp_dir()
                rect_uri = local_path_uri(rect_path)

                bm = BlockMatrix.from_numpy(nd, block_size=block_size)
                bm.export_rectangles(rect_uri, rects)

                self._assert_rectangles_eq(nd, rect_path, rects)

                rect_path_bytes = new_local_temp_dir()
                rect_uri_bytes = local_path_uri(rect_path_bytes)

                bm.export_rectangles(rect_uri_bytes, rects, binary=True)
                self._assert_rectangles_eq(nd, rect_path_bytes, rects, binary=True)
示例#2
0
    def test_export_rectangles(self):
        nd = np.arange(0, 80, dtype=float).reshape(8, 10)

        rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]]

        rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]]

        rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6],
                  [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5],
                  [0, 8, 0, 10]]

        for rects in [rects1, rects2, rects3]:
            for block_size in [3, 4, 10]:
                bm_uri = new_temp_file()

                rect_path = new_local_temp_dir()
                rect_uri = local_path_uri(rect_path)

                (BlockMatrix.from_numpy(
                    nd,
                    block_size=block_size).sparsify_rectangles(rects).write(
                        bm_uri, force_row_major=True))

                BlockMatrix.export_rectangles(bm_uri, rect_uri, rects)

                for (i, r) in enumerate(rects):
                    file = rect_path + '/rect-' + str(i) + '_' + '-'.join(
                        map(str, r))
                    expected = nd[r[0]:r[1], r[2]:r[3]]
                    actual = np.loadtxt(file, ndmin=2)
                    self._assert_eq(expected, actual)

                rect_path_bytes = new_local_temp_dir()
                rect_uri_bytes = local_path_uri(rect_path_bytes)

                BlockMatrix.export_rectangles(bm_uri,
                                              rect_uri_bytes,
                                              rects,
                                              binary=True)

                for (i, r) in enumerate(rects):
                    file = rect_path_bytes + '/rect-' + str(
                        i) + '_' + '-'.join(map(str, r))
                    expected = nd[r[0]:r[1], r[2]:r[3]]
                    actual = np.reshape(np.fromfile(file),
                                        (r[1] - r[0], r[3] - r[2]))
                    self._assert_eq(expected, actual)

        bm_uri = new_temp_file()
        rect_uri = new_temp_file()

        (BlockMatrix.from_numpy(nd, block_size=5).sparsify_rectangles(
            [[0, 1, 0, 1]]).write(bm_uri, force_row_major=True))

        with self.assertRaises(FatalError) as e:
            BlockMatrix.export_rectangles(bm_uri, rect_uri, [[5, 6, 5, 6]])
            self.assertEquals(
                e.msg,
                'block (1, 1) missing for rectangle 0 with bounds [5, 6, 5, 6]'
            )
示例#3
0
    def test_export_rectangles(self):
        nd = np.arange(0, 80, dtype=float).reshape(8, 10)

        rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]]

        rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]]

        rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6],
                  [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5],
                  [0, 8, 0, 10]]

        for rects in [rects1, rects2, rects3]:
            for block_size in [3, 4, 10]:
                rect_path = new_local_temp_dir()
                rect_uri = local_path_uri(rect_path)

                bm = BlockMatrix.from_numpy(nd, block_size=block_size)
                bm.export_rectangles(rect_uri, rects)

                self._assert_rectangles_eq(nd, rect_path, rects)

                rect_path_bytes = new_local_temp_dir()
                rect_uri_bytes = local_path_uri(rect_path_bytes)

                bm.export_rectangles(rect_uri_bytes, rects, binary=True)
                self._assert_rectangles_eq(nd,
                                           rect_path_bytes,
                                           rects,
                                           binary=True)
示例#4
0
    def test_export_rectangles(self):
        nd = np.arange(0, 80, dtype=float).reshape(8, 10)

        rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]]

        rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]]

        rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3],
                  [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8],
                  [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]]

        for rects in [rects1, rects2, rects3]:
            for block_size in [3, 4, 10]:
                bm_uri = new_temp_file()

                rect_path = new_local_temp_dir()
                rect_uri = local_path_uri(rect_path)

                (BlockMatrix.from_numpy(nd, block_size=block_size)
                    .sparsify_rectangles(rects)
                    .write(bm_uri, force_row_major=True))

                BlockMatrix.export_rectangles(bm_uri, rect_uri, rects)

                for (i, r) in enumerate(rects):
                    file = rect_path + '/rect-' + str(i) + '_' + '-'.join(map(str, r))
                    expected = nd[r[0]:r[1], r[2]:r[3]]
                    actual = np.loadtxt(file, ndmin = 2)
                    self._assert_eq(expected, actual)

                rect_path_bytes = new_local_temp_dir()
                rect_uri_bytes = local_path_uri(rect_path_bytes)

                BlockMatrix.export_rectangles(bm_uri, rect_uri_bytes, rects, binary=True)

                for (i, r) in enumerate(rects):
                    file = rect_path_bytes + '/rect-' + str(i) + '_' + '-'.join(map(str, r))
                    expected = nd[r[0]:r[1], r[2]:r[3]]
                    actual = np.reshape(np.fromfile(file), (r[1] - r[0], r[3] - r[2]))
                    self._assert_eq(expected, actual)

        bm_uri = new_temp_file()
        rect_uri = new_temp_file()

        (BlockMatrix.from_numpy(nd, block_size=5)
            .sparsify_rectangles([[0, 1, 0, 1]])
            .write(bm_uri, force_row_major=True))

        with self.assertRaises(FatalError) as e:
            BlockMatrix.export_rectangles(bm_uri, rect_uri, [[5, 6, 5, 6]])
            self.assertEquals(e.msg, 'block (1, 1) missing for rectangle 0 with bounds [5, 6, 5, 6]')
示例#5
0
    def to_numpy(self):
        """Collects the block matrix into a `NumPy ndarray
        <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html>`__.

        Examples
        --------

        >>> from hail.linalg import BlockMatrix
        >>> bm = BlockMatrix.random(10, 20)
        >>> a = bm.to_numpy()

        Notes
        -----
        The number of entries must be less than :math:`2^{31}`.

        The resulting ndarray will have the same shape as the block matrix.

        Returns
        -------
        :class:`numpy.ndarray`
        """
        path = new_local_temp_file()
        uri = local_path_uri(path)
        self.tofile(uri)
        return np.fromfile(path).reshape((self.n_rows, self.n_cols))
示例#6
0
def _jarray_from_ndarray(nd):
    if nd.size >= (1 << 31):
        raise ValueError(f'size of ndarray must be less than 2^31, found {nd.size}')

    nd = _ndarray_as_float64(nd)
    path = new_local_temp_file()
    uri = local_path_uri(path)
    nd.tofile(path)
    return Env.hail().utils.richUtils.RichArray.importFromDoubles(Env.hc()._jhc, uri, nd.size)
示例#7
0
    def test_export_blocks(self):
        nd = np.ones(shape=(8, 10))
        bm = BlockMatrix.from_numpy(nd, block_size=20)

        bm_path = new_local_temp_dir()
        bm_uri = local_path_uri(bm_path)
        bm.export_blocks(bm_uri, binary=True)
        actual = BlockMatrix.rectangles_to_numpy(bm_path, binary=True)

        self._assert_eq(nd, actual)
示例#8
0
 def copy_log(self, path: str) -> None:
     log = Env.hc()._log
     try:
         if self.is_dir(path):
             _, tail = os.path.split(log)
             path = os.path.join(path, tail)
         info(f"copying log to {repr(path)}...")
         self.copy(local_path_uri(Env.hc()._log), path)
     except Exception as e:
         sys.stderr.write(f'Could not copy log: encountered error:\n  {e}')
示例#9
0
    def test_export_blocks(self):
        nd = np.ones(shape=(8, 10))
        bm = BlockMatrix.from_numpy(nd, block_size=20)

        bm_path = new_local_temp_dir()
        bm_uri = local_path_uri(bm_path)
        bm.export_blocks(bm_uri, binary=True)
        actual = BlockMatrix.rectangles_to_numpy(bm_path, binary=True)

        self._assert_eq(nd, actual)
示例#10
0
文件: fs.py 项目: jigold/hail
 def copy_log(self, path: str) -> None:
     log = Env.hc()._log
     try:
         if self.is_dir(path):
             _, tail = os.path.split(log)
             path = os.path.join(path, tail)
         info(f"copying log to {repr(path)}...")
         self.copy(local_path_uri(Env.hc()._log), path)
     except Exception as e:
         sys.stderr.write(f'Could not copy log: encountered error:\n  {e}')
示例#11
0
    def test_rectangles_to_numpy(self):
        nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])

        rects = [[0, 3, 0, 1], [1, 2, 0, 2]]

        rect_path = new_local_temp_dir()
        rect_uri = local_path_uri(rect_path)
        BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects)

        rect_bytes_path = new_local_temp_dir()
        rect_bytes_uri = local_path_uri(rect_bytes_path)
        BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri,
                                                     rects,
                                                     binary=True)

        expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]])
        self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path))
        self._assert_eq(
            expected,
            BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True))
示例#12
0
    def test_rectangles_to_numpy(self):
        nd = np.array([[1.0, 2.0, 3.0],
                       [4.0, 5.0, 6.0],
                       [7.0, 8.0, 9.0]])

        rects = [[0, 3, 0, 1], [1, 2, 0, 2]]

        rect_path = new_local_temp_dir()
        rect_uri = local_path_uri(rect_path)
        BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects)

        rect_bytes_path = new_local_temp_dir()
        rect_bytes_uri = local_path_uri(rect_bytes_path)
        BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True)

        expected = np.array([[1.0, 0.0],
                             [4.0, 5.0],
                             [7.0, 0.0]])
        self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path))
        self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True))
示例#13
0
    def test_export_rectangles_filtered(self):
        rect_path = new_local_temp_dir()
        rect_uri = local_path_uri(rect_path)
        nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0],
                       [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]])
        bm = BlockMatrix.from_numpy(nd)
        bm = bm[1:3, 1:3]
        export_rects = [[0, 1, 0, 2], [1, 2, 0, 2]]
        bm.export_rectangles(rect_uri, export_rects)

        expected = np.array([[6.0, 7.0], [10.0, 11.0]])

        self._assert_rectangles_eq(expected, rect_path, export_rects)
示例#14
0
    def from_numpy(cls, ndarray, block_size=None):
        """Distributes a `NumPy ndarray
        <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html>`__
        as a block matrix.

        Examples
        --------

        >>> import numpy as np
        >>> a = np.random.rand(10, 20)
        >>> bm = BlockMatrix.from_numpy(a)

        Notes
        -----
        The ndarray must have two dimensions, each of non-zero size.

        The number of entries must be less than :math:`2^{31}`.

        Parameters
        ----------
        ndarray: :class:`numpy.ndarray`
            ndarray with two dimensions, each of non-zero size.
        block_size: :obj:`int`, optional
            Block size. Default given by :meth:`default_block_size`.

        Returns
        -------
        :class:`.BlockMatrix`
        """
        if not block_size:
            block_size = BlockMatrix.default_block_size()

        if ndarray.ndim != 2:
            raise FatalError(
                "from_numpy: ndarray must have two axes, found shape {}".
                format(ndarray.shape))
        n_rows, n_cols = ndarray.shape
        if n_rows == 0 or n_cols == 0:
            raise FatalError(
                "from_numpy: ndarray dimensions must be non-zero, found shape {}"
                .format(ndarray.shape))
        if ndarray.dtype != np.float64:
            ndarray = ndarray.astype(np.float64)

        local_temp_dir = new_local_temp_dir()
        path = local_temp_dir + '/binary'
        uri = local_path_uri(path)

        ndarray.tofile(path)
        return cls.fromfile(uri, n_rows, n_cols, block_size)
示例#15
0
    def test_export_rectangles_sparse(self):
        rect_path = new_local_temp_dir()
        rect_uri = local_path_uri(rect_path)
        nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0],
                       [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]])
        bm = BlockMatrix.from_numpy(nd, block_size=2)
        sparsify_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]]
        export_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4], [2, 4, 2, 4]]
        bm.sparsify_rectangles(sparsify_rects).export_rectangles(
            rect_uri, export_rects)

        expected = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0],
                             [9.0, 10.0, 0.0, 0.0], [13.0, 14.0, 0.0, 0.0]])

        self._assert_rectangles_eq(expected, rect_path, export_rects)
示例#16
0
    def from_numpy(cls, ndarray, block_size=None):
        """Distributes a `NumPy ndarray
        <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html>`__
        as a block matrix.

        Examples
        --------

        >>> import numpy as np
        >>> a = np.random.rand(10, 20)
        >>> bm = BlockMatrix.from_numpy(a)

        Notes
        -----
        The ndarray must have two dimensions, each of non-zero size.

        The number of entries must be less than :math:`2^{31}`.

        Parameters
        ----------
        ndarray: :class:`numpy.ndarray`
            ndarray with two dimensions, each of non-zero size.
        block_size: :obj:`int`, optional
            Block size. Default given by :meth:`default_block_size`.

        Returns
        -------
        :class:`.BlockMatrix`
        """
        if not block_size:
            block_size = BlockMatrix.default_block_size()

        if any(i == 0 for i in ndarray.shape):
            raise ValueError(
                f'from_numpy: ndarray dimensions must be non-zero, found shape {ndarray.shape}'
            )

        nd = _ndarray_as_2d(ndarray)
        nd = _ndarray_as_float64(nd)
        n_rows, n_cols = nd.shape

        path = new_local_temp_file()
        uri = local_path_uri(path)
        nd.tofile(path)
        return cls.fromfile(uri, n_rows, n_cols, block_size)
示例#17
0
def copy_log(path: str) -> None:
    """Attempt to copy the session log to a hadoop-API-compatible location.

    Examples
    --------
    Specify a manual path:

    >>> hl.copy_log('gs://my-bucket/analysis-10-jan19.log')  # DOCTEST: +SKIP
    INFO: copying log to 'gs://my-bucket/analysis-10-jan19.log'...

    Copy to a directory:

    >>> hl.copy_log('gs://my-bucket/')  # DOCTEST: +SKIP
    INFO: copying log to 'gs://my-bucket/hail-20180924-2018-devel-46e5fad57524.log'...

    Notes
    -----
    Since Hail cannot currently log directly to distributed file systems, this
    function is provided as a utility for offloading logs from ephemeral nodes.

    If `path` is a directory, then the log file will be copied using its
    base name to the directory (e.g. ``/home/hail.log`` would be copied as
    ``gs://my-bucket/hail.log`` if `path` is ``gs://my-bucket``.

    Parameters
    ----------
    path: :obj:`str`
    """
    log = Env.hc()._log
    try:
        if hadoop_is_dir(path):
            _, tail = os.path.split(log)
            path = os.path.join(path, tail)
        info(f"copying log to {repr(path)}...")
        hadoop_copy(local_path_uri(Env.hc()._log), path)
    except Exception as e:
        sys.stderr.write(f'Could not copy log: encountered error:\n  {e}')
示例#18
0
def _ndarray_from_jarray(ja):
    path = new_local_temp_file()
    uri = local_path_uri(path)
    Env.hail().utils.richUtils.RichArray.exportToDoubles(Env.hc()._jhc, uri, ja)
    return np.fromfile(path)