示例#1
0
def get_select_exprs(caller, exprs, named_exprs, indices, protect_keys=True):
    from hail.expr.expressions import to_expr, ExpressionException, TopLevelReference, Select
    exprs = [
        to_expr(e) if not isinstance(e, str) else indices.source[e]
        for e in exprs
    ]
    named_exprs = {k: to_expr(v) for k, v in named_exprs.items()}
    assignments = OrderedDict()

    for e in exprs:
        if not e._indices == indices:
            raise ExpressionException(
                "method '{}' parameter 'exprs' expects {}-indexed fields,"
                " found indices {}".format(caller, list(indices.axes),
                                           list(e._indices.axes)))
        if not e._ast.is_nested_field:
            raise ExpressionException(
                "method '{}' expects keyword arguments for complex expressions"
                .format(caller))
        if protect_keys:
            check_keys(e._ast.name, indices)
        assignments[e._ast.name] = e
    for k, e in named_exprs.items():
        if protect_keys:
            check_keys(k, indices)
        check_collisions(indices.source._fields, k, indices)
        assignments[k] = e
    check_field_uniqueness(assignments.keys())
    return assignments
示例#2
0
def check_collisions(caller, names, indices, override_protected_indices=None):
    from hail.expr.expressions import ExpressionException
    fields = indices.source._fields

    if override_protected_indices is not None:

        def invalid(e):
            return e._indices in override_protected_indices
    else:

        def invalid(e):
            return e._indices != indices

    # check collisions with fields on other axes
    for name in names:
        if name in fields and invalid(fields[name]):
            msg = f"{caller!r}: name collision with field indexed by {list(fields[name]._indices.axes)}: {name!r}"
            error('Analysis exception: {}'.format(msg))
            raise ExpressionException(msg)

    # check duplicate fields
    for k, v in Counter(names).items():
        if v > 1:
            from hail.expr.expressions import ExpressionException
            raise ExpressionException(
                f"{caller!r}: selection would produce duplicate field {k!r}")
示例#3
0
def check_collisions(fields, name, indices):
    from hail.expr.expressions import ExpressionException
    if name in fields and not fields[name]._indices == indices:
        msg = 'name collision with field indexed by {}: {}'.format(
            list(fields[name]._indices.axes), name)
        error('Analysis exception: {}'.format(msg))
        raise ExpressionException(msg)
示例#4
0
文件: misc.py 项目: troels/hail
def get_key_by_exprs(caller, exprs, named_exprs, indices, override_protected_indices=None):
    from hail.expr.expressions import to_expr, ExpressionException, analyze
    exprs = [indices.source[e] if isinstance(e, str) else e for e in exprs]
    named_exprs = {k: to_expr(v) for k, v in named_exprs.items()}

    bindings = []

    def is_top_level_field(e):
        return e in indices.source._fields_inverse

    existing_key_fields = []
    final_key = []
    for e in exprs:
        analyze(caller, e, indices, broadcast=False)
        if not e._ir.is_nested_field:
            raise ExpressionException(f"{caller!r} expects keyword arguments for complex expressions\n"
                                      f"  Correct:   ht = ht.key_by('x')\n"
                                      f"  Correct:   ht = ht.key_by(ht.x)\n"
                                      f"  Correct:   ht = ht.key_by(x = ht.x.replace(' ', '_'))\n"
                                      f"  INCORRECT: ht = ht.key_by(ht.x.replace(' ', '_'))")

        name = e._ir.name
        final_key.append(name)

        if not is_top_level_field(e):
            bindings.append((name, e))
        else:
            existing_key_fields.append(name)

    final_key.extend(named_exprs)
    bindings.extend(named_exprs.items())
    check_collisions(caller, final_key, indices, override_protected_indices=override_protected_indices)
    return final_key, dict(bindings)
示例#5
0
文件: misc.py 项目: rcownie/hail
def get_select_exprs(caller, exprs, named_exprs, indices, protect_keys=True):
    from hail.expr.expressions import to_expr, ExpressionException, analyze
    exprs = [
        to_expr(e) if not isinstance(e, str) else indices.source[e]
        for e in exprs
    ]
    named_exprs = {k: to_expr(v) for k, v in named_exprs.items()}
    assignments = OrderedDict()

    for e in exprs:
        if not e._ir.is_nested_field:
            raise ExpressionException(
                "method '{}' expects keyword arguments for complex expressions"
                .format(caller))
        analyze(caller, e, indices, broadcast=False)
        if protect_keys:
            check_keys(e._ir.name, indices)
        assignments[e._ir.name] = e
    for k, e in named_exprs.items():
        if protect_keys:
            check_keys(k, indices)
        check_collisions(indices.source._fields, k, indices)
        assignments[k] = e
    check_field_uniqueness(assignments.keys())
    return assignments
示例#6
0
def check_keys(caller, name, protected_key):
    from hail.expr.expressions import ExpressionException
    if name in protected_key:
        msg = f"{caller!r}: cannot overwrite key field {name!r} with annotate, select or drop; " \
              f"use key_by to modify keys."
        error('Analysis exception: {}'.format(msg))
        raise ExpressionException(msg)
示例#7
0
def check_keys(name, indices):
    from hail.expr.expressions import ExpressionException
    if indices.key is None:
        return
    if name in set(indices.key):
        msg = "cannot overwrite key field {} with annotate, select or drop; use key_by to modify keys.".format(repr(name))
        error('Analysis exception: {}'.format(msg))
        raise ExpressionException(msg)
示例#8
0
 def coerce(self, x) -> Expression:
     x = to_expr(x)
     if not self.can_coerce(x.dtype):
         raise ExpressionException(f"cannot coerce type '{x.dtype}' to type '{self.str_t}'")
     if self._requires_conversion(x.dtype):
         return self._coerce(x)
     else:
         return x
示例#9
0
def get_annotate_exprs(caller, named_exprs, indices):
    from hail.expr.expressions import to_expr, ExpressionException
    named_exprs = {k: to_expr(v) for k, v in named_exprs.items()}
    for k, v in named_exprs.items():
        check_keys(k, indices)
        if indices.key and k in indices.key.keys():
            raise ExpressionException("'{}' cannot overwrite key field: {}"
                                      .format(caller, repr(k)))
        check_collisions(indices.source._fields, k, indices)
    return named_exprs
示例#10
0
def get_select_exprs(caller, exprs, named_exprs, indices, base_struct):
    from hail.expr.expressions import to_expr, ExpressionException, analyze
    exprs = [indices.source[e] if isinstance(e, str) else e for e in exprs]
    named_exprs = {k: to_expr(v) for k, v in named_exprs.items()}
    select_fields = indices.protected_key[:]
    protected_key = set(select_fields)
    insertions = {}

    final_fields = select_fields[:]

    def is_top_level_field(e):
        return e in indices.source._fields_inverse

    for e in exprs:
        if not e._ir.is_nested_field:
            raise ExpressionException(
                f"{caller!r} expects keyword arguments for complex expressions\n"
                f"  Correct:   ht = ht.select('x')\n"
                f"  Correct:   ht = ht.select(ht.x)\n"
                f"  Correct:   ht = ht.select(x = ht.x.replace(' ', '_'))\n"
                f"  INCORRECT: ht = ht.select(ht.x.replace(' ', '_'))")
        analyze(caller, e, indices, broadcast=False)

        name = e._ir.name
        check_keys(caller, name, protected_key)
        final_fields.append(name)
        if is_top_level_field(e):
            select_fields.append(name)
        else:
            insertions[name] = e
    for k, e in named_exprs.items():
        check_keys(caller, k, protected_key)
        final_fields.append(k)
        insertions[k] = e

    check_collisions(caller, final_fields, indices)

    if final_fields == select_fields + list(insertions):
        # don't clog the IR with redundant field names
        s = base_struct.select(*select_fields).annotate(**insertions)
    else:
        s = base_struct.select(*select_fields)._annotate_ordered(
            insertions, final_fields)

    assert list(s) == final_fields
    return s
示例#11
0
    def when_missing(self, then) -> 'SwitchBuilder':
        """Add a test for missingness. If the `base` expression is missing,
        returns `then`.

        Parameters
        ----------
        then : :class:`.Expression`

        Returns
        -------
        :class:`.SwitchBuilder`
            Mutates and returns `self`.
        """
        if self._when_missing_case is not None:
            raise ExpressionException("'when_missing' can only be called once")
        self._unify_type(then.dtype)

        self._when_missing_case = then
        return self
示例#12
0
文件: builders.py 项目: shulik7/hail
    def or_missing(self):
        """Finish the case statement by returning missing.

        Notes
        -----
        If no condition from a :meth:`.CaseBuilder.when` call is ``True``, then
        the result is missing.

        Parameters
        ----------
        then : :class:`.Expression`

        Returns
        -------
        :class:`.Expression`
        """
        if len(self._cases) == 0:
            raise ExpressionException("'or_missing' cannot be called without at least one 'when' call")
        from hail.expr.functions import null
        return self._finish(null(self._ret_type))
示例#13
0
    def write_from_entry_expr(entry_expr, path, block_size=None):
        """Writes a block matrix from a matrix table entry expression.

        Notes
        -----
        The resulting file can be loaded with :meth:`BlockMatrix.read`.

        Parameters
        ----------
        entry_expr: :class:`.Float64Expression`
            Entry expression for numeric matrix entries.
        path: :obj:`str`
            Path for output.
        block_size: :obj:`int`, optional
            Block size. Default given by :meth:`.BlockMatrix.default_block_size`.
        """
        if not block_size:
            block_size = BlockMatrix.default_block_size()

        source = entry_expr._indices.source
        if not isinstance(source, MatrixTable):
            raise ValueError(
                "Expect an expression of 'MatrixTable', found {}".format(
                    "expression of '{}'".format(source.__class__)
                    if source is not None else 'scalar expression'))

        if entry_expr._indices != source._entry_indices:
            from hail.expr.expressions import ExpressionException
            raise ExpressionException(
                "from_entry_expr: 'entry_expr' must be entry-indexed,"
                " found indices {}".format(list(entry_expr._indices.axes)))

        if entry_expr in source._fields_inverse:
            source._jvds.writeBlockMatrix(path,
                                          source._fields_inverse[entry_expr],
                                          block_size)
        else:
            uid = Env.get_uid()
            source.select_entries(**{
                uid: entry_expr
            })._jvds.writeBlockMatrix(path, uid, block_size)
示例#14
0
    def when_missing(self, then) -> 'SwitchBuilder':
        """Add a test for missingness. If the `base` expression is missing,
        returns `then`.

        Parameters
        ----------
        then : :class:`.Expression`

        Returns
        -------
        :class:`.SwitchBuilder`
            Mutates and returns `self`.
        """
        if self._has_missing_branch:
            raise ExpressionException("'when_missing' can only be called once")
        self._unify_type(then.dtype)

        from hail.expr.functions import is_missing
        # need to insert at 0, because upstream missingness would propagate
        self._cases.insert(0, (is_missing(self._base), then))
        return self
示例#15
0
    def or_error(self, message):
        """Finish the case statement by throwing an error with the given message.

        Notes
        -----
        If no condition from a :meth:`.CaseBuilder.when` call is ``True``, then
        an error is thrown.

        Parameters
        ----------
        message : :class:`.Expression` of type :obj:`.tstr`

        Returns
        -------
        :class:`.Expression`
        """
        if len(self._cases) == 0:
            raise ExpressionException(
                "'or_error' cannot be called without at least one 'when' call")
        error_expr = construct_expr(ir.Die(message._ir, self._ret_type),
                                    self._ret_type)
        return self._finish(error_expr)
示例#16
0
    def or_missing(self):
        """Finish the switch statement by returning missing.

        Notes
        -----
        If no value from a :meth:`~.SwitchBuilder.when` call is matched, then
        the result is missing.

        Parameters
        ----------
        then : :class:`.Expression`

        Returns
        -------
        :class:`.Expression`
        """
        if len(self._cases) == 0:
            raise ExpressionException(
                "'or_missing' cannot be called without at least one 'when' call"
            )
        from hail.expr.functions import missing
        return self._finish(missing(self._ret_type))
示例#17
0
def check_field_uniqueness(fields):
    for k, v in Counter(fields).items():
        if v > 1:
            from hail.expr.expressions import ExpressionException
            raise ExpressionException("selection would produce duplicate field '{}'".format(repr(k)))