def gen_table_expr(table, expr): resolver = { name: FakeSeries(dtype.to_pandas_dtype()) for name, dtype in zip(table.schema.names, table.schema.types) } scope = Scope(level=0, resolvers=(resolver, )) return Expr(expr=expr, env=scope)
def Expr(*args, **kwargs): import warnings warnings.warn("pd.Expr is deprecated as it is not " "applicable to user code", FutureWarning, stacklevel=2) from pandas.core.computation.expr import Expr return Expr(*args, **kwargs)
def inject_df(s): """ convert expression string like (a > 1) to (df["a"] > 1) so that it can be eval'd later """ expr = Expr(s, env=env, parser="pandas") self = expr._visitor def visit_Name_hack(node, **kwargs): result = self.term_type(node.id, self.env, **kwargs) result._name = f'df["{result._name}"]' return result def _maybe_downcast_constants_hack(left, right): return left, right expr._visitor.visit_Name = visit_Name_hack expr._visitor._maybe_downcast_constants = _maybe_downcast_constants_hack expr.terms = expr.parse() return str(expr)
def gen_table_expr(table, expr): """ Build pandas expression for the specified query. Parameters ---------- table : pyarrow.Table Table to evaluate expression on. expr : str Query string to evaluate on the `table` columns. Returns ------- pandas.core.computation.expr.Expr """ resolver = { name: FakeSeries(dtype.to_pandas_dtype()) for name, dtype in zip(table.schema.names, table.schema.types) } scope = Scope(level=0, resolvers=(resolver, )) return Expr(expr=expr, env=scope)
def eval(expr, parser='pandas', engine=None, truediv=True, local_dict=None, global_dict=None, resolvers=(), level=0, target=None, inplace=False): """Evaluate a Python expression as a string using various backends. The following arithmetic operations are supported: ``+``, ``-``, ``*``, ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, :keyword:`or`, and :keyword:`not` with the same semantics as the corresponding bitwise operators. :class:`~pandas.Series` and :class:`~pandas.DataFrame` objects are supported and behave as they would with plain ol' Python evaluation. Parameters ---------- expr : str or unicode The expression to evaluate. This string cannot contain any Python `statements <https://docs.python.org/3/reference/simple_stmts.html#simple-statements>`__, only Python `expressions <https://docs.python.org/3/reference/simple_stmts.html#expression-statements>`__. parser : string, default 'pandas', {'pandas', 'python'} The parser to use to construct the syntax tree from the expression. The default of ``'pandas'`` parses code slightly different than standard Python. Alternatively, you can parse an expression using the ``'python'`` parser to retain strict Python semantics. See the :ref:`enhancing performance <enhancingperf.eval>` documentation for more details. engine : string or None, default 'numexpr', {'python', 'numexpr'} The engine used to evaluate the expression. Supported engines are - None : tries to use ``numexpr``, falls back to ``python`` - ``'numexpr'``: This default engine evaluates pandas objects using numexpr for large speed ups in complex expressions with large frames. - ``'python'``: Performs operations as if you had ``eval``'d in top level python. This engine is generally not that useful. More backends may be available in the future. truediv : bool, optional Whether to use true division, like in Python >= 3 local_dict : dict or None, optional A dictionary of local variables, taken from locals() by default. global_dict : dict or None, optional A dictionary of global variables, taken from globals() by default. resolvers : list of dict-like or None, optional A list of objects implementing the ``__getitem__`` special method that you can use to inject an additional collection of namespaces to use for variable lookup. For example, this is used in the :meth:`~pandas.DataFrame.query` method to inject the ``DataFrame.index`` and ``DataFrame.columns`` variables that refer to their respective :class:`~pandas.DataFrame` instance attributes. level : int, optional The number of prior stack frames to traverse and add to the current scope. Most users will **not** need to change this parameter. target : object, optional, default None This is the target object for assignment. It is used when there is variable assignment in the expression. If so, then `target` must support item assignment with string keys, and if a copy is being returned, it must also support `.copy()`. inplace : bool, default False If `target` is provided, and the expression mutates `target`, whether to modify `target` inplace. Otherwise, return a copy of `target` with the mutation. Returns ------- ndarray, numeric scalar, DataFrame, Series Raises ------ ValueError There are many instances where such an error can be raised: - `target=None`, but the expression is multiline. - The expression is multiline, but not all them have item assignment. An example of such an arrangement is this: a = b + 1 a + 2 Here, there are expressions on different lines, making it multiline, but the last line has no variable assigned to the output of `a + 2`. - `inplace=True`, but the expression is missing item assignment. - Item assignment is provided, but the `target` does not support string item assignment. - Item assignment is provided and `inplace=False`, but the `target` does not support the `.copy()` method Notes ----- The ``dtype`` of any objects involved in an arithmetic ``%`` operation are recursively cast to ``float64``. See the :ref:`enhancing performance <enhancingperf.eval>` documentation for more details. See Also -------- pandas.DataFrame.query pandas.DataFrame.eval """ from pandas.core.computation.expr import Expr inplace = validate_bool_kwarg(inplace, "inplace") if isinstance(expr, string_types): _check_expression(expr) exprs = [e.strip() for e in expr.splitlines() if e.strip() != ''] else: exprs = [expr] multi_line = len(exprs) > 1 if multi_line and target is None: raise ValueError("multi-line expressions are only valid in the " "context of data, use DataFrame.eval") ret = None first_expr = True target_modified = False for expr in exprs: expr = _convert_expression(expr) engine = _check_engine(engine) _check_parser(parser) _check_resolvers(resolvers) _check_for_locals(expr, level, parser) # get our (possibly passed-in) scope env = _ensure_scope(level + 1, global_dict=global_dict, local_dict=local_dict, resolvers=resolvers, target=target) parsed_expr = Expr(expr, engine=engine, parser=parser, env=env, truediv=truediv) # construct the engine and evaluate the parsed expression eng = _engines[engine] eng_inst = eng(parsed_expr) ret = eng_inst.evaluate() if parsed_expr.assigner is None: if multi_line: raise ValueError("Multi-line expressions are only valid" " if all expressions contain an assignment") elif inplace: raise ValueError("Cannot operate inplace " "if there is no assignment") # assign if needed assigner = parsed_expr.assigner if env.target is not None and assigner is not None: target_modified = True # if returning a copy, copy only on the first assignment if not inplace and first_expr: try: target = env.target.copy() except AttributeError: raise ValueError("Cannot return a copy of the target") else: target = env.target # TypeError is most commonly raised (e.g. int, list), but you # get IndexError if you try to do this assignment on np.ndarray. # we will ignore numpy warnings here; e.g. if trying # to use a non-numeric indexer try: with warnings.catch_warnings(record=True): target[assigner] = ret except (TypeError, IndexError): raise ValueError("Cannot assign expression output to target") if not resolvers: resolvers = ({assigner: ret}, ) else: # existing resolver needs updated to handle # case of mutating existing column in copy for resolver in resolvers: if assigner in resolver: resolver[assigner] = ret break else: resolvers += ({assigner: ret}, ) ret = None first_expr = False # We want to exclude `inplace=None` as being False. if inplace is False: return target if target_modified else ret
def eval(expr, parser='pandas', engine=None, truediv=True, local_dict=None, global_dict=None, resolvers=(), level=0, target=None, inplace=None): """Evaluate a Python expression as a string using various backends. The following arithmetic operations are supported: ``+``, ``-``, ``*``, ``/``, ``**``, ``%``, ``//`` (python engine only) along with the following boolean operations: ``|`` (or), ``&`` (and), and ``~`` (not). Additionally, the ``'pandas'`` parser allows the use of :keyword:`and`, :keyword:`or`, and :keyword:`not` with the same semantics as the corresponding bitwise operators. :class:`~pandas.Series` and :class:`~pandas.DataFrame` objects are supported and behave as they would with plain ol' Python evaluation. Parameters ---------- expr : str or unicode The expression to evaluate. This string cannot contain any Python `statements <http://docs.python.org/2/reference/simple_stmts.html#simple-statements>`__, only Python `expressions <http://docs.python.org/2/reference/simple_stmts.html#expression-statements>`__. parser : string, default 'pandas', {'pandas', 'python'} The parser to use to construct the syntax tree from the expression. The default of ``'pandas'`` parses code slightly different than standard Python. Alternatively, you can parse an expression using the ``'python'`` parser to retain strict Python semantics. See the :ref:`enhancing performance <enhancingperf.eval>` documentation for more details. engine : string or None, default 'numexpr', {'python', 'numexpr'} The engine used to evaluate the expression. Supported engines are - None : tries to use ``numexpr``, falls back to ``python`` - ``'numexpr'``: This default engine evaluates pandas objects using numexpr for large speed ups in complex expressions with large frames. - ``'python'``: Performs operations as if you had ``eval``'d in top level python. This engine is generally not that useful. More backends may be available in the future. truediv : bool, optional Whether to use true division, like in Python >= 3 local_dict : dict or None, optional A dictionary of local variables, taken from locals() by default. global_dict : dict or None, optional A dictionary of global variables, taken from globals() by default. resolvers : list of dict-like or None, optional A list of objects implementing the ``__getitem__`` special method that you can use to inject an additional collection of namespaces to use for variable lookup. For example, this is used in the :meth:`~pandas.DataFrame.query` method to inject the :attr:`~pandas.DataFrame.index` and :attr:`~pandas.DataFrame.columns` variables that refer to their respective :class:`~pandas.DataFrame` instance attributes. level : int, optional The number of prior stack frames to traverse and add to the current scope. Most users will **not** need to change this parameter. target : a target object for assignment, optional, default is None essentially this is a passed in resolver inplace : bool, default True If expression mutates, whether to modify object inplace or return copy with mutation. WARNING: inplace=None currently falls back to to True, but in a future version, will default to False. Use inplace=True explicitly rather than relying on the default. Returns ------- ndarray, numeric scalar, DataFrame, Series Notes ----- The ``dtype`` of any objects involved in an arithmetic ``%`` operation are recursively cast to ``float64``. See the :ref:`enhancing performance <enhancingperf.eval>` documentation for more details. See Also -------- pandas.DataFrame.query pandas.DataFrame.eval """ inplace = validate_bool_kwarg(inplace, 'inplace') first_expr = True if isinstance(expr, string_types): _check_expression(expr) exprs = [e.strip() for e in expr.splitlines() if e.strip() != ''] else: exprs = [expr] multi_line = len(exprs) > 1 if multi_line and target is None: raise ValueError("multi-line expressions are only valid in the " "context of data, use DataFrame.eval") first_expr = True for expr in exprs: expr = _convert_expression(expr) engine = _check_engine(engine) _check_parser(parser) _check_resolvers(resolvers) _check_for_locals(expr, level, parser) # get our (possibly passed-in) scope env = _ensure_scope(level + 1, global_dict=global_dict, local_dict=local_dict, resolvers=resolvers, target=target) parsed_expr = Expr(expr, engine=engine, parser=parser, env=env, truediv=truediv) # construct the engine and evaluate the parsed expression eng = _engines[engine] eng_inst = eng(parsed_expr) ret = eng_inst.evaluate() if parsed_expr.assigner is None and multi_line: raise ValueError("Multi-line expressions are only valid" " if all expressions contain an assignment") # assign if needed if env.target is not None and parsed_expr.assigner is not None: if inplace is None: warnings.warn( "eval expressions containing an assignment currently" "default to operating inplace.\nThis will change in " "a future version of pandas, use inplace=True to " "avoid this warning.", FutureWarning, stacklevel=3) inplace = True # if returning a copy, copy only on the first assignment if not inplace and first_expr: target = env.target.copy() else: target = env.target target[parsed_expr.assigner] = ret if not resolvers: resolvers = ({parsed_expr.assigner: ret},) else: # existing resolver needs updated to handle # case of mutating existing column in copy for resolver in resolvers: if parsed_expr.assigner in resolver: resolver[parsed_expr.assigner] = ret break else: resolvers += ({parsed_expr.assigner: ret},) ret = None first_expr = False if not inplace and inplace is not None: return target return ret
def _resolve_query( query: str, context: pd.DataFrame = None, resolvers: tuple = None, engine: str = None, parser: str = "pandas", ): """Resolve query in the given context.""" from pandas.core.computation.expr import Expr from pandas.core.computation.eval import _ensure_scope if not query: return context q = query q = re.sub(r"\[\(", "", q) q = re.sub(r"\b(\d)+\b", "", q) q = re.sub(r"[+\-\*:!<>=~.|&%]", " ", q) # get our (possibly passed-in) scope resolvers = resolvers or () if isinstance(context, pd.DataFrame): index_resolvers = context._get_index_resolvers() resolvers = tuple(resolvers) + (dict(context.iteritems()), index_resolvers) repl = [] for idx, resolver in enumerate(resolvers): keys = resolver.keys() for op in set(q.split()): matches = [(op, k) for k in keys if re.search(op, k)] if len(matches) == 1: op, key = matches[0] repl.append((idx, op, resolver[key])) elif len(matches) > 1: raise KeyError(f"Ambiguous query operand provided: `{op}`") for idx, op, val in repl: resolvers[idx][op] = val env = _ensure_scope(level=1, resolvers=resolvers, target=context) expr = Expr(query, engine=engine, parser=parser, env=env) def _resolve_operands(operands) -> list: for op in operands: # complex query if op.is_scalar: continue if hasattr(op, "operands"): yield from _resolve_operands(op.operands) yield str(op) operands = set(_resolve_operands(expr.terms.operands)) for op in operands: try: query = query.replace(op, env.resolvers[op].name) except KeyError: pass return context.query(query)