def test_export_block_matrices(self):
        data = [np.random.rand(11 * 12), np.random.rand(5 * 17)]
        arrs = [data[0].reshape((11, 12)), data[1].reshape((5, 17))]
        bms = [
            hl.linalg.BlockMatrix._create(11,
                                          12,
                                          data[0].tolist(),
                                          block_size=4),
            hl.linalg.BlockMatrix._create(5,
                                          17,
                                          data[1].tolist(),
                                          block_size=8)
        ]
        with hl.TemporaryDirectory() as prefix:
            hl.experimental.export_block_matrices(bms, f'{prefix}/files')
            for i in range(len(bms)):
                a = arrs[i]
                a2 = np.loadtxt(
                    hl.current_backend().fs.open(f'{prefix}/files/{i}.tsv'))
                self.assertTrue(np.array_equal(a, a2))

        with hl.TemporaryDirectory() as prefix2:
            custom_names = ["nameA", "inner/nameB.tsv"]
            hl.experimental.export_block_matrices(
                bms, f'{prefix2}/files', custom_filenames=custom_names)
            for i in range(len(bms)):
                a = arrs[i]
                a2 = np.loadtxt(hl.current_backend().fs.open(
                    f'{prefix2}/files/{custom_names[i]}'))
                self.assertTrue(np.array_equal(a, a2))
示例#2
0
    def test_hadoop_mkdir_p(self):
        test_text = "HELLO WORLD"

        with hadoop_open(resource('./some/foo/bar.txt'), 'w') as out:
            out.write(test_text)

        self.assertTrue(hl.hadoop_exists(resource('./some/foo/bar.txt')))

        with hadoop_open(resource('./some/foo/bar.txt')) as f:
            assert (f.read() == test_text)

        hl.current_backend().fs.rmtree(resource('./some'))
示例#3
0
    def test_to_from_numpy(self):
        n_rows = 10
        n_cols = 11
        data = np.random.rand(n_rows * n_cols)

        bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4)
        a = data.reshape((n_rows, n_cols))

        with hl.TemporaryFilename() as bm_f, hl.TemporaryFilename() as a_f:
            bm.tofile(bm_f)
            a.tofile(a_f)

            a1 = bm.to_numpy()
            a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy()
            a3 = np.frombuffer(
                hl.current_backend().fs.open(bm_f, mode='rb').read()
            ).reshape((n_rows, n_cols))
            a4 = BlockMatrix.fromfile(a_f, n_rows, n_cols, block_size=3).to_numpy()
            a5 = BlockMatrix.fromfile(bm_f, n_rows, n_cols).to_numpy()

            self._assert_eq(a1, a)
            self._assert_eq(a2, a)
            self._assert_eq(a3, a)
            self._assert_eq(a4, a)
            self._assert_eq(a5, a)

        bmt = bm.T
        at = a.T

        with hl.TemporaryFilename() as bmt_f, hl.TemporaryFilename() as at_f:
            bmt.tofile(bmt_f)
            at.tofile(at_f)

            at1 = bmt.to_numpy()
            at2 = BlockMatrix.from_numpy(at).to_numpy()
            at3 = np.frombuffer(
                hl.current_backend().fs.open(bmt_f, mode='rb').read()
            ).reshape((n_cols, n_rows))
            at4 = BlockMatrix.fromfile(at_f, n_cols, n_rows).to_numpy()
            at5 = BlockMatrix.fromfile(bmt_f, n_cols, n_rows).to_numpy()

            self._assert_eq(at1, at)
            self._assert_eq(at2, at)
            self._assert_eq(at3, at)
            self._assert_eq(at4, at)
            self._assert_eq(at5, at)

        self._assert_eq(bm.to_numpy(_force_blocking=True), a)
 def save(self):
     fs = hl.current_backend().fs
     try:
         backup_path = self.save_path + '.bak'
         if fs.exists(self.save_path):
             fs.copy(self.save_path, backup_path)
         with fs.open(self.save_path, 'w') as out:
             json.dump(self, out, indent=2, cls=Encoder)
         if fs.exists(backup_path):
             fs.remove(backup_path)
     except OSError as e:
         # these messages get printed, because there is absolutely no guarantee
         # that the hail context is in a sane state if any of the above operations
         # fail
         print(
             f'Failed saving {self.__class__.__name__} state at {self.save_path}'
         )
         print(
             f'An attempt was made to copy {self.save_path} to {backup_path}'
         )
         print('An old version of this state may be there.')
         print(
             'Dumping current state as json to standard output, you may wish '
             'to save this output in order to resume the combiner.')
         json.dump(self, sys.stdout, indent=2, cls=Encoder)
         print()
         raise e
def set_query_name(request):
    backend = current_backend()
    if isinstance(backend, ServiceBackend):
        backend.batch_attributes = dict(name=request.node.name)
        yield
        backend.batch_attributes = dict()
    else:
        yield
 def load(path) -> 'VariantDatasetCombiner':
     fs = hl.current_backend().fs
     with fs.open(path) as stream:
         combiner = json.load(stream, cls=Decoder)
         if combiner.save_path != path:
             warning(
                 'path/save_path mismatch in loaded VariantDatasetCombiner, using '
                 f'{path} as the new save_path for this combiner')
             combiner.save_path = path
         return combiner
示例#7
0
 def _assert_rectangles_eq(self, expected, rect_path, export_rects, binary=False):
     for (i, r) in enumerate(export_rects):
         piece_path = rect_path + '/rect-' + str(i) + '_' + '-'.join(map(str, r))
         with hl.current_backend().fs.open(piece_path, mode='rb' if binary else 'r') as file:
             expected_rect = expected[r[0]:r[1], r[2]:r[3]]
             if binary:
                 actual_rect = np.reshape(
                     np.frombuffer(file.read()),
                     (r[1] - r[0], r[3] - r[2]))
             else:
                 actual_rect = np.loadtxt(file, ndmin=2)
             self._assert_eq(expected_rect, actual_rect)
    def is_resource_available(self) -> bool:
        """
        Check if this resource is available from the selected source.

        :return: True if the resource is available.
        """
        path = self.path

        # Hail Tables, MatrixTables, and BlockMatrices are directories.
        # For those, check for the existence of the _SUCCESS object.
        path_to_test = (f"{path}/_SUCCESS" if any(
            path.endswith(ext) for ext in (".ht", ".mt", ".bm")) else path)

        return hl.current_backend().fs.exists(path_to_test)
示例#9
0
    def test_backward_compatability(self):
        import os

        def backward_compatible_same(current, old):
            if isinstance(current, hl.Table):
                current = current.select_globals(*old.globals)
                current = current.select(*old.row_value)
            else:
                current = current.select_globals(*old.globals)
                current = current.select_rows(*old.row_value)
                current = current.select_cols(*old.col_value)
                current = current.select_entries(*old.entry)
            return current._same(old)

        all_values_table, all_values_matrix_table = create_all_values_datasets(
        )

        resource_dir = resource('backward_compatability')
        fs = hl.current_backend().fs
        versions = [os.path.basename(x['path']) for x in fs.ls(resource_dir)]

        n = 0
        for v in versions:
            table_dir = os.path.join(resource_dir, v, 'table')
            i = 0
            f = os.path.join(table_dir, '{}.ht'.format(i))
            while fs.exists(f):
                ds = hl.read_table(f)
                assert backward_compatible_same(all_values_table, ds)
                i += 1
                f = os.path.join(table_dir, '{}.ht'.format(i))
                n += 1

            matrix_table_dir = os.path.join(resource_dir, v, 'matrix_table')
            i = 0
            f = os.path.join(matrix_table_dir, '{}.hmt'.format(i))
            while fs.exists(f):
                ds = hl.read_matrix_table(f)
                assert backward_compatible_same(all_values_matrix_table, ds)
                i += 1
                f = os.path.join(matrix_table_dir, '{}.hmt'.format(i))
                n += 1

        assert n == 72
 def test_block_matrices_tofiles(self):
     data = [np.random.rand(11 * 12), np.random.rand(5 * 17)]
     arrs = [data[0].reshape((11, 12)), data[1].reshape((5, 17))]
     bms = [
         hl.linalg.BlockMatrix._create(11,
                                       12,
                                       data[0].tolist(),
                                       block_size=4),
         hl.linalg.BlockMatrix._create(5,
                                       17,
                                       data[1].tolist(),
                                       block_size=8)
     ]
     with hl.TemporaryDirectory() as prefix:
         hl.experimental.block_matrices_tofiles(bms, f'{prefix}/files')
         for i in range(len(bms)):
             a = data[i]
             a2 = np.frombuffer(hl.current_backend().fs.open(
                 f'{prefix}/files/{i}', mode='rb').read())
             self.assertTrue(np.array_equal(a, a2))
 def maybe_load_from_saved_path(
         save_path: str) -> Optional[VariantDatasetCombiner]:
     if force:
         return None
     fs = hl.current_backend().fs
     if fs.exists(save_path):
         try:
             combiner = load_combiner(save_path)
             warning(
                 f'found existing combiner plan at {save_path}, using it')
             # we overwrite these values as they are serialized, but not part of the
             # hash for an autogenerated name and we want users to be able to overwrite
             # these when resuming a combine (a common reason to need to resume a combine
             # is a failure due to branch factor being too large)
             combiner.branch_factor = branch_factor
             combiner.target_records = target_records
             combiner.gvcf_batch_size = batch_size
             return combiner
         except (ValueError, TypeError, OSError, KeyError):
             warning(
                 f'file exists at {save_path}, but it is not a valid combiner plan, overwriting'
             )
     return None
示例#12
0
 def test_top_level_functions_are_do_not_error(self):
     hl.current_backend()
     hl.debug_info()
示例#13
0
 def setupAnnotationDBTests(cls):
     startTestHailContext()
     backend = hl.current_backend()
     if isinstance(backend, ServiceBackend):
         backend.batch_attributes = dict(name='setupAnnotationDBTests')
     t = hl.utils.range_table(10)
     t = t.key_by(locus=hl.locus('1', t.idx + 1))
     t = t.annotate(annotation=hl.str(t.idx))
     cls.tempdir_manager = hl.TemporaryDirectory()
     d = cls.tempdir_manager.__enter__()
     fname = d + '/f.mt'
     t.write(fname)
     if isinstance(backend, ServiceBackend):
         backend.batch_attributes = dict()
     cls.db_json = {
         'unique_dataset': {
             'description':
             'now with unique rows!',
             'url':
             'https://example.com',
             'annotation_db': {
                 'key_properties': ['unique']
             },
             'versions': [{
                 'url': {
                     "aws": {
                         "eu": fname,
                         "us": fname
                     },
                     "gcp": {
                         "eu": fname,
                         "us": fname
                     }
                 },
                 'version': 'v1',
                 'reference_genome': 'GRCh37'
             }]
         },
         'nonunique_dataset': {
             'description':
             'non-unique rows :(',
             'url':
             'https://example.net',
             'annotation_db': {
                 'key_properties': []
             },
             'versions': [{
                 'url': {
                     "aws": {
                         "eu": fname,
                         "us": fname
                     },
                     "gcp": {
                         "eu": fname,
                         "us": fname
                     }
                 },
                 'version': 'v1',
                 'reference_genome': 'GRCh37'
             }]
         }
     }
示例#14
0
 def test_top_level_functions_are_do_not_error(self):
     hl.current_backend()
     hl.debug_info()
示例#15
0
def test_count_range():
    assert isinstance(hl.current_backend(), ServiceBackend)
    assert hl.utils.range_table(1000)._force_count() == 1000
示例#16
0
    def test_remove_and_rmtree(self, prefix: Optional[str] = None):
        if prefix is None:
            prefix = self.remote_tmpdir

        fs = hl.current_backend().fs

        dir = f'{prefix}foo/'
        subdir1 = f'{dir}foo/'
        subdir1subdir1 = f'{subdir1}foo/'
        subdir1subdir2 = f'{subdir1}bar/'
        subdir1subdir3 = f'{subdir1}baz/'

        def touch(filename):
            with fs.open(filename, 'w') as fobj:
                fobj.write('hello world')

        fs.mkdir(dir)
        touch(f'{dir}a')
        touch(f'{dir}b')

        fs.mkdir(subdir1)
        touch(f'{subdir1}a')
        fs.mkdir(subdir1subdir1)
        touch(f'{subdir1subdir1}a')
        fs.mkdir(subdir1subdir2)
        touch(f'{subdir1subdir2}a')
        fs.mkdir(subdir1subdir3)
        touch(f'{subdir1subdir3}a')

        try:
            fs.remove(subdir1subdir2)
        except (FileNotFoundError, IsADirectoryError):
            pass
        except FatalError as err:
            java_nio_error_message = 'DirectoryNotEmptyException: Cannot delete a non-empty directory'
            hadoop_error_message = f'Directory {subdir1subdir2.rstrip("/")} is not empty'
            assert java_nio_error_message in err.args[0] or hadoop_error_message in err.args[0]
        else:
            assert False

        fs.remove(f'{subdir1subdir2}a')

        assert fs.exists(dir)
        assert fs.exists(f'{dir}a')
        assert fs.exists(f'{dir}b')
        assert fs.exists(subdir1)
        assert fs.exists(f'{subdir1}a')
        assert fs.exists(subdir1subdir1)
        assert fs.exists(f'{subdir1subdir1}a')
        # subdir1subdir2: will exist in cloud, but not local, so do not test for it
        assert not fs.exists(f'{subdir1subdir2}a')
        assert fs.exists(subdir1subdir3)
        assert fs.exists(f'{subdir1subdir3}a')

        fs.rmtree(subdir1subdir1)

        assert fs.exists(dir)
        assert fs.exists(f'{dir}a')
        assert fs.exists(f'{dir}b')
        assert fs.exists(subdir1)
        assert fs.exists(f'{subdir1}a')
        assert not fs.exists(subdir1subdir1)
        assert not fs.exists(f'{subdir1subdir1}a')
        # subdir1subdir2: will exist in cloud, but not local, so do not test for it
        assert not fs.exists(f'{subdir1subdir2}a')
        assert fs.exists(subdir1subdir3)
        assert fs.exists(f'{subdir1subdir3}a')

        fs.rmtree(subdir1)

        assert fs.exists(dir)
        assert fs.exists(f'{dir}a')
        assert fs.exists(f'{dir}b')
        assert not fs.exists(subdir1)
        assert not fs.exists(f'{subdir1}a')
        assert not fs.exists(subdir1subdir1)
        assert not fs.exists(f'{subdir1subdir1}a')
        assert not fs.exists(subdir1subdir2)
        assert not fs.exists(f'{subdir1subdir2}a')
        assert not fs.exists(subdir1subdir3)
        assert not fs.exists(f'{subdir1subdir3}a')
示例#17
0
    def test_subdirs(self, prefix: Optional[str] = None):
        if prefix is None:
            prefix = self.remote_tmpdir

        fs = hl.current_backend().fs

        dir = f'{prefix}foo/'
        subdir1 = f'{dir}foo/'
        subdir1subdir1 = f'{subdir1}foo/'
        subdir1subdir2 = f'{subdir1}bar/'
        subdir1subdir3 = f'{subdir1}baz/'
        subdir1subdir4_empty = f'{subdir1}qux/'
        subdir2 = f'{dir}bar/'
        subdir3 = f'{dir}baz/'
        subdir4_empty = f'{dir}qux/'

        def touch(filename):
            with fs.open(filename, 'w') as fobj:
                fobj.write('hello world')

        fs.mkdir(dir)
        touch(f'{dir}a')
        touch(f'{dir}b')

        fs.mkdir(subdir1)
        fs.mkdir(subdir1subdir1)
        fs.mkdir(subdir1subdir2)
        fs.mkdir(subdir1subdir3)
        fs.mkdir(subdir1subdir4_empty)
        fs.mkdir(subdir2)
        fs.mkdir(subdir3)
        fs.mkdir(subdir4_empty)

        for subdir in [dir, subdir1, subdir2, subdir3, subdir1subdir1, subdir1subdir2, subdir1subdir3]:
            for i in range(30):
                touch(f'{subdir}a{i:02}')

        assert fs.is_dir(dir)
        assert fs.is_dir(subdir1)
        assert fs.is_dir(subdir1subdir1)
        assert fs.is_dir(subdir1subdir2)
        assert fs.is_dir(subdir1subdir3)
        # subdir1subdir4_empty: in cloud fses, empty dirs do not exist and thus are not dirs
        assert fs.is_dir(subdir2)
        assert fs.is_dir(subdir3)
        # subdir4_empty: in cloud fses, empty dirs do not exist and thus are not dirs

        fs.rmtree(subdir1subdir2)

        assert fs.is_dir(dir)
        assert fs.is_file(f'{dir}a')
        assert fs.is_file(f'{dir}b')

        assert fs.is_dir(subdir1)
        assert fs.is_file(f'{subdir1}a00')

        assert fs.is_dir(subdir1subdir1)
        assert fs.is_file(f'{subdir1subdir1}a00')

        assert not fs.is_dir(subdir1subdir2)
        assert not fs.is_file(f'{subdir1subdir2}a00')

        assert fs.is_dir(subdir1subdir3)
        assert fs.is_file(f'{subdir1subdir3}a00')

        assert fs.is_dir(subdir2)
        assert fs.is_file(f'{subdir2}a00')
        assert fs.is_dir(subdir3)
        assert fs.is_file(f'{subdir3}a00')

        fs.rmtree(dir)

        assert not fs.is_dir(dir)