def elemwise_array(expr, *data, **kwargs): leaves = expr._inputs expr_inds = tuple(range(ndim(expr)))[::-1] return atop(curry(compute_it, expr, leaves, **kwargs), expr_inds, *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data), dtype=expr.dshape.measure.to_numpy_dtype())
def compute_broadcast(expr, *data, **kwargs): leaves = expr._inputs expr_inds = tuple(range(ndim(expr)))[::-1] func = get_numba_ufunc(expr) return atop(func, next(names), expr_inds, *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data))
def elemwise_array(expr, *data, **kwargs): leaves = expr._inputs expr_inds = tuple(range(ndim(expr)))[::-1] return atop( curry(compute_it, expr, leaves, **kwargs), expr_inds, *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data) )
def compute_broadcast(expr, *data, **kwargs): expr_inds = tuple(range(ndim(expr)))[::-1] func = get_numba_ufunc(expr) return atop(func, expr_inds, *concat( (dat, tuple(range(ndim(dat))[::-1])) for dat in data), dtype=data[-1].dtype)
def elemwise_array(expr, *data, **kwargs): leaves = expr._inputs expr_inds = tuple(range(ndim(expr)))[::-1] return atop( curry(compute_it, expr, leaves, **kwargs), expr_inds, *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data), dtype=expr.dshape.measure.to_numpy_dtype() )
def compute_up(expr, data, **kwargs): leaf = expr._leaves()[0] chunk = symbol('chunk', DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure,)))) (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr, chunk=chunk) inds = tuple(range(ndim(leaf))) dtype = expr.dshape.measure.to_numpy_dtype() tmp = atop( curry(compute_it, chunk_expr, [chunk], **kwargs), inds, data, inds, dtype=dtype, ) return atop( compose( curry(compute_it, agg_expr, [agg], **kwargs), curry(_concatenate2, axes=expr.axis), ), tuple(i for i in inds if i not in expr.axis), tmp, inds, dtype=dtype, )
def compute_up(expr, data, **kwargs): leaf = expr._leaves()[0] chunk = symbol( 'chunk', DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure, )))) (chunk, chunk_expr), (agg, agg_expr) = split(expr._child, expr, chunk=chunk) inds = tuple(range(ndim(leaf))) dtype = expr.dshape.measure.to_numpy_dtype() tmp = atop( curry(compute_it, chunk_expr, [chunk], **kwargs), inds, data, inds, dtype=dtype, ) return atop( compose( curry(compute_it, agg_expr, [agg], **kwargs), curry(_concatenate2, axes=expr.axis), ), tuple(i for i in inds if i not in expr.axis), tmp, inds, dtype=dtype, )
def from_blaze(expr, deltas='auto', loader=None, resources=None, no_deltas_rule=_valid_no_deltas_rules[0]): """Create a Pipeline API object from a blaze expression. Parameters ---------- expr : Expr The blaze expression to use. deltas : Expr or 'auto', optional The expression to use for the point in time adjustments. If the string 'auto' is passed, a deltas expr will be looked up by stepping up the expression tree and looking for another field with the name of ``expr`` + '_deltas'. If None is passed, no deltas will be used. loader : BlazeLoader, optional The blaze loader to attach this pipeline dataset to. If None is passed, the global blaze loader is used. resources : dict or any, optional The data to execute the blaze expressions against. This is used as the scope for ``bz.compute``. no_deltas_rule : {'warn', 'raise', 'ignore'} What should happen if ``deltas='auto'`` but no deltas can be found. 'warn' says to raise a warning but continue. 'raise' says to raise an exception if no deltas can be found. 'ignore' says take no action and proceed with no deltas. Returns ------- pipeline_api_obj : DataSet or BoundColumn Either a new dataset or bound column based on the shape of the expr passed in. If a table shaped expression is passed, this will return a ``DataSet`` that represents the whole table. If an array-like shape is passed, a ``BoundColumn`` on the dataset that would be constructed from passing the parent is returned. """ deltas = _get_deltas(expr, deltas, no_deltas_rule) if deltas is not None: invalid_nodes = tuple(filter(is_invalid_deltas_node, expr._subterms())) if invalid_nodes: raise TypeError( 'expression with deltas may only contain (%s) nodes,' " found: %s" % ( ', '.join(map(getname, valid_deltas_node_types)), ', '.join(set(map(compose(getname, type), invalid_nodes))), ), ) # Check if this is a single column out of a dataset. if bz.ndim(expr) != 1: raise TypeError( 'expression was not tabular or array-like,' ' %s dimensions: %d' % ( 'too many' if bz.ndim(expr) > 1 else 'not enough', bz.ndim(expr), ), ) single_column = None if isscalar(expr.dshape.measure): # This is a single column. Record which column we are to return # but create the entire dataset. single_column = rename = expr._name field_hit = False if not isinstance(expr, traversable_nodes): raise TypeError( "expression '%s' was array-like but not a simple field of" " some larger table" % str(expr), ) while isinstance(expr, traversable_nodes): if isinstance(expr, bz.expr.Field): if not field_hit: field_hit = True else: break rename = expr._name expr = expr._child dataset_expr = expr.relabel({rename: single_column}) else: dataset_expr = expr measure = dataset_expr.dshape.measure if not isrecord(measure) or AD_FIELD_NAME not in measure.names: raise TypeError( "The dataset must be a collection of records with at least an" " '{ad}' field. Fields provided: '{fields}'\nhint: maybe you need" " to use `relabel` to change your field names".format( ad=AD_FIELD_NAME, fields=measure, ), ) _check_datetime_field(AD_FIELD_NAME, measure) dataset_expr, deltas = _ensure_timestamp_field(dataset_expr, deltas) if deltas is not None and (sorted(deltas.dshape.measure.fields) != sorted(measure.fields)): raise TypeError( 'baseline measure != deltas measure:\n%s != %s' % ( measure, deltas.dshape.measure, ), ) # Ensure that we have a data resource to execute the query against. _check_resources('dataset_expr', dataset_expr, resources) _check_resources('deltas', deltas, resources) # Create or retrieve the Pipeline API dataset. ds = new_dataset(dataset_expr, deltas) # Register our new dataset with the loader. (loader if loader is not None else global_loader)[ds] = ExprData( dataset_expr, deltas, resources, ) if single_column is not None: # We were passed a single column, extract and return it. return getattr(ds, single_column) return ds
def elemwise_array(expr, *data, **kwargs): leaves = expr._inputs expr_inds = tuple(range(ndim(expr)))[::-1] return atop(curry(compute_it, expr, leaves, **kwargs), next(names), expr_inds, *concat((dat, tuple(range(ndim(dat))[::-1])) for dat in data))