Python Env.backend示例，hail.utils.java.Env.backend Python示例

示例#1

0

显示文件

文件： reference_genome.py 项目： jigold/hail

    def from_fasta_file(cls, name, fasta_file, index_file,
                        x_contigs=[], y_contigs=[], mt_contigs=[], par=[]):
        """Create reference genome from a FASTA file.
        
        Parameters
        ----------
        name: :obj:`str`
            Name for new reference genome.
        fasta_file : :obj:`str`
            Path to FASTA file. Can be compressed (GZIP) or uncompressed.
        index_file : :obj:`str`
            Path to FASTA index file. Must be uncompressed.
        x_contigs : :obj:`str` or :obj:`list` of :obj:`str`
            Contigs to be treated as X chromosomes.
        y_contigs : :obj:`str` or :obj:`list` of :obj:`str`
            Contigs to be treated as Y chromosomes.
        mt_contigs : :obj:`str` or :obj:`list` of :obj:`str`
            Contigs to be treated as mitochondrial DNA.
        par : :obj:`list` of :obj:`tuple` of (str, int, int)
            List of tuples with (contig, start, end)

        Returns
        -------
        :class:`.ReferenceGenome`
        """
        par_strings = ["{}:{}-{}".format(contig, start, end) for (contig, start, end) in par]
        Env.backend().from_fasta_file(name, fasta_file, index_file, x_contigs, y_contigs, mt_contigs, par_strings)
        
        rg = ReferenceGenome._from_config(Env.backend().get_reference(name), _builtin=True)
        rg._has_sequence = True
        return rg

示例#2

0

显示文件

def write_block_matrices(bms: List[BlockMatrix],
                         path_prefix: str,
                         overwrite: bool = False,
                         force_row_major: bool = False,
                         stage_locally: bool = False):
    """Writes a sequence of block matrices to disk in the same format as BlockMatrix.write.

    :param bms: :obj:`list` of :class:`BlockMatrix`
        Block matrices to write to disk.
    :param path_prefix: obj:`str`
        Prefix of path to write the block matrices to.
    :param overwrite: obj:`bool`
        If true, overwrite any files with the same name as the block matrices being generated.
    :param force_row_major: obj:`bool`
        If ``True``, transform blocks in column-major format
        to row-major format before writing.
        If ``False``, write blocks in their current format.
    :param stage_locally: :obj:`bool`
        If ``True``, major output will be written to temporary local storage
        before being copied to ``output``.
    """
    writer = BlockMatrixNativeMultiWriter(path_prefix, overwrite,
                                          force_row_major, stage_locally)
    Env.backend().execute(
        BlockMatrixMultiWrite([bm._bmir for bm in bms], writer))

示例#3

0

显示文件

文件： base_ir.py 项目： theferrit32/hail

 def __del__(self):
     try:
         Env.backend()._jhc.pyRemoveIrVector(self.jid)
     # there is only so much we can do if the attempt to remove the unused IR fails,
     # especially since this will often get called during interpreter shutdown.
     except Exception:
         pass

示例#4

0

显示文件

文件： expression_utils.py 项目： zietzm/hail

def eval_timed(expression):
    """Evaluate a Hail expression, returning the result and the times taken for
    each stage in the evaluation process.

    Parameters
    ----------
    expression : :class:`.Expression`
        Any expression, or a Python value that can be implicitly interpreted as an expression.

    Returns
    -------
    (Any, dict)
        Result of evaluating `expression` and a dictionary of the timings
    """
    from hail.utils.java import Env

    analyze('eval_timed', expression, Indices(expression._indices.source))

    if expression._indices.source is None:
        ir_type = expression._ir.typ
        expression_type = expression.dtype
        if ir_type != expression.dtype:
            raise ExpressionException(
                f'Expression type and IR type differed: \n{ir_type}\n vs \n{expression_type}'
            )
        return Env.backend().execute(expression._ir, True)
    else:
        uid = Env.get_uid()
        ir = expression._indices.source.select_globals(**{
            uid: expression
        }).index_globals()[uid]._ir
        return Env.backend().execute(ir, True)

示例#5

0

显示文件

文件： reference_genome.py 项目： jigold/hail

    def __init__(self, name, contigs, lengths, x_contigs=[], y_contigs=[], mt_contigs=[], par=[], _builtin=False):
        super(ReferenceGenome, self).__init__()
        
        contigs = wrap_to_list(contigs)
        x_contigs = wrap_to_list(x_contigs)
        y_contigs = wrap_to_list(y_contigs)
        mt_contigs = wrap_to_list(mt_contigs)

        self._config = {
            'name': name,
            'contigs': [{'name': c, 'length': l} for c, l in lengths.items()],
            'xContigs': x_contigs,
            'yContigs': y_contigs,
            'mtContigs': mt_contigs,
            'par': [{'start': {'contig': c, 'position': s}, 'end': {'contig': c, 'position': e}} for (c, s, e) in par]
        }

        self._contigs = contigs
        self._lengths = lengths
        self._par_tuple = par
        self._par = [hl.Interval(hl.Locus(c, s, self), hl.Locus(c, e, self)) for (c, s, e) in par]

        ReferenceGenome._references[name] = self

        if not _builtin:
            Env.backend().add_reference(self._config)

        hl.ir.register_reference_genome_functions(name)

        self._has_sequence = False
        self._liftovers = set()

示例#6

0

显示文件

文件： expression_utils.py 项目： jigold/hail

def eval_timed(expression):
    """Evaluate a Hail expression, returning the result and the times taken for
    each stage in the evaluation process.

    Parameters
    ----------
    expression : :class:`.Expression`
        Any expression, or a Python value that can be implicitly interpreted as an expression.

    Returns
    -------
    (Any, dict)
        Result of evaluating `expression` and a dictionary of the timings
    """
    from hail.utils.java import Env

    analyze('eval_timed', expression, Indices(expression._indices.source))

    if expression._indices.source is None:
        ir_type = expression._ir.typ
        expression_type = expression.dtype
        if ir_type != expression.dtype:
            raise ExpressionException(f'Expression type and IR type differed: \n{ir_type}\n vs \n{expression_type}')
        return Env.backend().execute(expression._ir, True)
    else:
        uid = Env.get_uid()
        ir = expression._indices.source.select_globals(**{uid: expression}).index_globals()[uid]._ir
        return Env.backend().execute(ir, True)

示例#7

0

显示文件

文件： reference_genome.py 项目： zietzm/hail

    def __init__(self,
                 name,
                 contigs,
                 lengths,
                 x_contigs=[],
                 y_contigs=[],
                 mt_contigs=[],
                 par=[],
                 _builtin=False):
        super(ReferenceGenome, self).__init__()

        contigs = wrap_to_list(contigs)
        x_contigs = wrap_to_list(x_contigs)
        y_contigs = wrap_to_list(y_contigs)
        mt_contigs = wrap_to_list(mt_contigs)

        self._config = {
            'name':
            name,
            'contigs': [{
                'name': c,
                'length': l
            } for c, l in lengths.items()],
            'xContigs':
            x_contigs,
            'yContigs':
            y_contigs,
            'mtContigs':
            mt_contigs,
            'par': [{
                'start': {
                    'contig': c,
                    'position': s
                },
                'end': {
                    'contig': c,
                    'position': e
                }
            } for (c, s, e) in par]
        }

        self._contigs = contigs
        self._lengths = lengths
        self._par_tuple = par
        self._par = [
            hl.Interval(hl.Locus(c, s, self), hl.Locus(c, e, self))
            for (c, s, e) in par
        ]
        self._global_positions = None

        ReferenceGenome._references[name] = self

        if not _builtin:
            Env.backend().add_reference(self._config)

        hl.ir.register_reference_genome_functions(name)

        self._sequence_files = None
        self._liftovers = dict()

示例#8

0

显示文件

文件： reference_genome.py 项目： jigold/hail

    def remove_sequence(self):
        """Remove the reference sequence.

        Returns
        -------
        :obj:`bool`
        """
        self._has_sequence = False
        Env.backend().remove_sequence(self.name)

示例#9

0

显示文件

def export_block_matrices(bms: List[BlockMatrix],
                          prefix: str,
                          overwrite: bool = False,
                          delimiter: str = '\t',
                          header: Optional[str] = None,
                          add_index: bool = False):
    writer = BlockMatrixTextMultiWriter(prefix, overwrite, delimiter, header,
                                        add_index)
    Env.backend().execute(
        BlockMatrixMultiWrite([bm._bmir for bm in bms], writer))

示例#10

0

显示文件

文件： reference_genome.py 项目： jigold/hail

    def remove_liftover(self, dest_reference_genome):
        """Remove liftover to `dest_reference_genome`.

        Parameters
        ----------
        dest_reference_genome : :obj:`str` or :class:`.ReferenceGenome`
        """
        if dest_reference_genome.name in self._liftovers:
            self._liftovers.remove(dest_reference_genome.name)
            Env.backend().remove_liftover(self.name, dest_reference_genome.name)

示例#11

0

显示文件

文件： reference_genome.py 项目： jigold/hail

    def add_sequence(self, fasta_file, index_file=None):
        """Load the reference sequence from a FASTA file.

        Examples
        --------
        Access the GRCh37 reference genome using :func:`.get_reference`:

        >>> rg = hl.get_reference('GRCh37') # doctest: +SKIP

        Add a sequence file:

        >>> rg.add_sequence('gs://hail-common/references/human_g1k_v37.fasta.gz',
        ...                 'gs://hail-common/references/human_g1k_v37.fasta.fai') # doctest: +SKIP

        Add a sequence file with the default index location:

        >>> rg.add_sequence('gs://hail-common/references/human_g1k_v37.fasta.gz') # doctest: +SKIP


        Notes
        -----
        This method can only be run once per reference genome. Use
        :meth:`~has_sequence` to test whether a sequence is loaded.

        FASTA and index files are hosted on google cloud for some of Hail's built-in
        references:

        **GRCh37**

        - FASTA file: ``gs://hail-common/references/human_g1k_v37.fasta.gz``
        - Index file: ``gs://hail-common/references/human_g1k_v37.fasta.fai``

        **GRCh38**

        - FASTA file: ``gs://hail-common/references/Homo_sapiens_assembly38.fasta.gz``
        - Index file: ``gs://hail-common/references/Homo_sapiens_assembly38.fasta.fai``

        Public download links are available
        `here <https://console.cloud.google.com/storage/browser/hail-common/references/>`__.

        Parameters
        ----------
        fasta_file : :obj:`str`
            Path to FASTA file. Can be compressed (GZIP) or uncompressed.
        index_file : :obj:`None` or :obj:`str`
            Path to FASTA index file. Must be uncompressed. If `None`, replace
            the fasta_file's extension with `fai`.
        """
        if index_file is None:
            index_file = re.sub('\.[^.]*$', '.fai', fasta_file)
        Env.backend().add_sequence(self.name, fasta_file, index_file)
        self._has_sequence = True

示例#12

0

显示文件

文件： reference_genome.py 项目： joonan30/hail

    def add_liftover(self, chain_file, dest_reference_genome):
        """Register a chain file for liftover.

        Examples
        --------
        Access GRCh37 and GRCh38 using :func:`.get_reference`:

        >>> rg37 = hl.get_reference('GRCh37') # doctest: +SKIP
        >>> rg38 = hl.get_reference('GRCh38') # doctest: +SKIP

        Add a chain file from 37 to 38:

        >>> rg37.add_liftover('gs://hail-common/references/grch37_to_grch38.over.chain.gz', rg38) # doctest: +SKIP

        Notes
        -----
        This method can only be run once per reference genome. Use
        :meth:`~has_liftover` to test whether a chain file has been registered.

        The chain file format is described
        `here <https://genome.ucsc.edu/goldenpath/help/chain.html>`__.

        Chain files are hosted on google cloud for some of Hail's built-in
        references:

        **GRCh37 to GRCh38**
        gs://hail-common/references/grch37_to_grch38.over.chain.gz

        **GRCh38 to GRCh37**
        gs://hail-common/references/grch38_to_grch37.over.chain.gz

        Public download links are available
        `here <https://console.cloud.google.com/storage/browser/hail-common/references/>`__.

        Parameters
        ----------
        chain_file : :obj:`str`
            Path to chain file. Can be compressed (GZIP) or uncompressed.
        dest_reference_genome : :obj:`str` or :class:`.ReferenceGenome`
            Reference genome to convert to.
        """

        Env.backend().add_liftover(self.name, chain_file,
                                   dest_reference_genome.name)
        if dest_reference_genome.name in self._liftovers:
            raise KeyError(
                f"Liftover already exists from {self.name} to {dest_reference_genome.name}."
            )
        self._liftovers[dest_reference_genome.name] = chain_file
        hl.ir.register_liftover_functions(self.name,
                                          dest_reference_genome.name)

示例#13

0

显示文件

文件： table_ir.py 项目： jigold/hail

 def _compute_type(self):
     name = self.config['name']
     if name == 'TableFilterPartitions' or name == 'TableFilterIntervals':
         self._type = self.child.typ
     else:
         assert name in ('VEP', 'Nirvana'), name
         self._type = Env.backend().table_type(self)

示例#14

0

显示文件

文件： expression_utils.py 项目： bcajes/hail

def eval_typed(expression):
    """Evaluate a Hail expression, returning the result and the type of the result.

    This method is extremely useful for learning about Hail expressions and understanding
    how to compose them.

    The expression must have no indices, but can refer to the globals
    of a :class:`.hail.Table` or :class:`.hail.MatrixTable`.

    Examples
    --------
    Evaluate a conditional:

    >>> x = 6
    >>> hl.eval_typed(hl.cond(x % 2 == 0, 'Even', 'Odd'))
    ('Even', dtype('str'))

    Parameters
    ----------
    expression : :class:`.Expression`
        Any expression, or a Python value that can be implicitly interpreted as an expression.

    Returns
    -------
    (any, :class:`.HailType`)
        Result of evaluating `expression`, and its type.

    """
    analyze('eval_typed', expression, Indices(expression._indices.source))

    if expression._indices.source is None:
        return (Env.backend().execute(expression._ir), expression.dtype)
    else:
        return expression.collect()[0], expression.dtype

示例#15

0

显示文件

def compute_and_annotate_ld_score(ht, r2_adj, radius, out_name, overwrite):
    starts_and_stops = hl.linalg.utils.locus_windows(ht.locus,
                                                     radius,
                                                     _localize=False)

    # Lifted directly from https://github.com/hail-is/hail/blob/555e02d6c792263db2c3ed97db8002b489e2dacb/hail/python/hail/methods/statgen.py#L2595
    # for the time being, until efficient BlockMatrix filtering gets an easier interface
    # This is required, as the squaring/multiplication densifies, so this re-sparsifies.
    r2_adj = BlockMatrix._from_java(
        r2_adj._jbm.filterRowIntervalsIR(
            Env.backend()._to_java_ir(starts_and_stops._ir), False))

    l2row = r2_adj.sum(axis=0).T
    l2col = r2_adj.sum(axis=1)
    l2 = l2row + l2col + 1
    l2_bm_tmp = new_temp_file()
    l2_tsv_tmp = new_temp_file()

    l2.write(l2_bm_tmp, force_row_major=True)
    BlockMatrix.export(l2_bm_tmp, l2_tsv_tmp)

    ht_scores = hl.import_table(l2_tsv_tmp, no_header=True, impute=True)
    ht_scores = ht_scores.add_index().rename({'f0': 'ld_score'})
    ht_scores = ht_scores.key_by('idx')
    ht = ht.annotate(**ht_scores[ht.new_idx]).select_globals()
    ht.filter(hl.is_defined(ht.ld_score)).write(out_name, overwrite)

示例#16

0

显示文件

文件： context.py 项目： tpoterba/hail

def spark_context():
    """Returns the active Spark context.

    Returns
    -------
    :class:`pyspark.SparkContext`
    """
    return Env.backend().sc

示例#17

0

显示文件

文件： reference_genome.py 项目： jigold/hail

    def add_liftover(self, chain_file, dest_reference_genome):
        """Register a chain file for liftover.

        Examples
        --------
        Access GRCh37 and GRCh38 using :func:`.get_reference`:

        >>> rg37 = hl.get_reference('GRCh37') # doctest: +SKIP
        >>> rg38 = hl.get_reference('GRCh38') # doctest: +SKIP

        Add a chain file from 37 to 38:

        >>> rg37.add_liftover('gs://hail-common/references/grch37_to_grch38.over.chain.gz', rg38) # doctest: +SKIP

        Notes
        -----
        This method can only be run once per reference genome. Use
        :meth:`~has_liftover` to test whether a chain file has been registered.

        The chain file format is described
        `here <https://genome.ucsc.edu/goldenpath/help/chain.html>`__.

        Chain files are hosted on google cloud for some of Hail's built-in
        references:

        **GRCh37 to GRCh38**
        gs://hail-common/references/grch37_to_grch38.over.chain.gz

        **GRCh38 to GRCh37**
        gs://hail-common/references/grch38_to_grch37.over.chain.gz

        Public download links are available
        `here <https://console.cloud.google.com/storage/browser/hail-common/references/>`__.

        Parameters
        ----------
        chain_file : :obj:`str`
            Path to chain file. Can be compressed (GZIP) or uncompressed.
        dest_reference_genome : :obj:`str` or :class:`.ReferenceGenome`
            Reference genome to convert to.
        """

        Env.backend().add_liftover(self.name, chain_file, dest_reference_genome.name)
        self._liftovers.add(dest_reference_genome.name)
        hl.ir.register_liftover_functions(self.name, dest_reference_genome.name)

示例#18

0

显示文件

文件： function.py 项目： TileDB-Inc/hail

def define_function(f, *param_types, _name=None, type_args=()):
    mname = _name if _name is not None else Env.get_uid()
    param_names = [Env.get_uid(mname) for _ in param_types]
    body = f(*(construct_expr(Ref(pn), pt)
               for pn, pt in zip(param_names, param_types)))
    ret_type = body.dtype

    Env.backend().register_ir_function(mname, type_args, param_names,
                                       param_types, ret_type, body)

    @typecheck(args=expr_any)
    def f(*args):
        indices, aggregations = unify_all(*args)
        return construct_expr(
            Apply(mname, ret_type, *(a._ir for a in args),
                  type_args=type_args), ret_type, indices, aggregations)

    return Function(f, param_types, ret_type, mname, type_args)

示例#19

0

显示文件

def generate_ld_scores_from_ld_matrix(pop_data,
                                      data_type,
                                      min_frequency=0.01,
                                      call_rate_cutoff=0.8,
                                      adj: bool = False,
                                      radius: int = 1000000,
                                      overwrite=False):
    # This function required a decent number of high-mem machines (with an SSD for good measure) to complete the AFR
    # For the rest, on 20 n1-standard-8's, 1h15m to export block matrix, 15 mins to compute LD scores per population (~$150 total)
    for label, pops in dict(pop_data).items():
        for pop, n in pops.items():
            if pop in ('nfe', 'fin', 'asj'): continue
            ht = hl.read_table(ld_index_path(data_type, pop, adj=adj))
            ht = ht.filter((ht.pop_freq.AF >= min_frequency)
                           & (ht.pop_freq.AF <= 1 - min_frequency)
                           & (ht.pop_freq.AN / n >= 2 *
                              call_rate_cutoff)).add_index(name='new_idx')

            indices = ht.idx.collect()

            r2 = BlockMatrix.read(
                ld_matrix_path(data_type,
                               pop,
                               min_frequency >= COMMON_FREQ,
                               adj=adj))
            r2 = r2.filter(indices, indices)**2
            r2_adj = ((n - 1.0) / (n - 2.0)) * r2 - (1.0 / (n - 2.0))

            starts_and_stops = hl.linalg.utils.locus_windows(ht.locus,
                                                             radius,
                                                             _localize=False)

            # Lifted directly from https://github.com/hail-is/hail/blob/555e02d6c792263db2c3ed97db8002b489e2dacb/hail/python/hail/methods/statgen.py#L2595
            # for the time being, until efficient BlockMatrix filtering gets an easier interface
            r2_adj = BlockMatrix._from_java(
                r2_adj._jbm.filterRowIntervalsIR(
                    Env.backend()._to_java_ir(starts_and_stops._ir), False))

            l2row = r2_adj.sum(axis=0).T
            l2col = r2_adj.sum(axis=1)
            l2 = l2row + l2col + 1

            l2_bm_tmp = new_temp_file()
            l2_tsv_tmp = new_temp_file()
            l2.write(l2_bm_tmp, force_row_major=True)
            BlockMatrix.export(l2_bm_tmp, l2_tsv_tmp)

            ht_scores = hl.import_table(l2_tsv_tmp,
                                        no_header=True,
                                        impute=True)
            ht_scores = ht_scores.add_index().rename({'f0': 'ld_score'})
            ht_scores = ht_scores.key_by('idx')

            ht = ht.annotate(**ht_scores[ht.new_idx]).select_globals()
            ht.filter(hl.is_defined(ht.ld_score)).write(
                ld_scores_path(data_type, pop, adj), overwrite)

示例#20

0

显示文件

文件： methods.py 项目： chrisvittal/hail

def write_variant_datasets(vdss,
                           paths,
                           *,
                           overwrite=False,
                           stage_locally=False,
                           codec_spec=None):
    """Write many `vdses` to their corresponding path in `paths`."""
    ref_writer = ir.MatrixNativeMultiWriter(
        [f"{p}/reference_data" for p in paths], overwrite, stage_locally,
        codec_spec)
    var_writer = ir.MatrixNativeMultiWriter(
        [f"{p}/variant_data" for p in paths], overwrite, stage_locally,
        codec_spec)
    Env.backend().execute(
        ir.MatrixMultiWrite([vds.reference_data._mir for vds in vdss],
                            ref_writer))
    Env.backend().execute(
        ir.MatrixMultiWrite([vds.variant_data._mir for vds in vdss],
                            var_writer))

示例#21

0

显示文件

文件： context.py 项目： saponas/hail

def debug_info():
    from hail.backend.spark_backend import SparkBackend
    hail_jar_path = None
    if pkg_resources.resource_exists(__name__, "hail-all-spark.jar"):
        hail_jar_path = pkg_resources.resource_filename(__name__, "hail-all-spark.jar")
    spark_conf = None
    if isinstance(Env.backend(), SparkBackend):
        spark_conf = spark_context()._conf.getAll()
    return {
        'spark_conf': spark_conf,
        'hail_jar_path': hail_jar_path,
        'version': version()
    }

示例#22

0

显示文件

文件： expression_utils.py 项目： hail-ci-test/ci-test-x0zfxmt0o313

def _eval_many(*expressions, timed=False, name='_eval_many'):
    from hail.utils.java import Env

    irs = []
    for expression in expressions:
        analyze(name, expression, Indices(expression._indices.source))
        if expression._indices.source is None:
            ir_type = expression._ir.typ
            expression_type = expression.dtype
            if ir_type != expression.dtype:
                raise ExpressionException(
                    f'Expression type and IR type differed: \n{ir_type}\n vs \n{expression_type}'
                )
            irs.append(expression._ir)
        else:
            uid = Env.get_uid()
            ir = expression._indices.source.select_globals(**{
                uid: expression
            }).index_globals()[uid]._ir
            irs.append(ir)

    return Env.backend().execute_many(*irs, timed=timed)

示例#23

0

显示文件

文件： blockmatrix_ir.py 项目： jigold/hail

 def _compute_type(self):
     self._type = Env.backend().blockmatrix_type(self)

示例#24

0

显示文件

文件： table_ir.py 项目： jigold/hail

 def _compute_type(self):
     self._type = Env.backend().table_type(self)