def test_array(self): t = fromiter(self.gen(), self.ds, self.p) shape, dtype = to_numpy(t.datashape) shape_orig, dtype_orig = to_numpy(self.ds) self.assertEqual(dtype, dtype_orig) self.assertEqual(len(shape), 1) self.assertEqual(shape[0], self.count)
def run_test(in_memory, args): T = Terminal print "opening blaze arrays..." x = blaze.open(_persistent_array_names[0]) y = blaze.open(_persistent_array_names[1]) z = blaze.open(_persistent_array_names[2]) w = blaze.open(_persistent_array_names[3]) shape, dtype = blaze.to_numpy(x.datashape) print "***nelements:", shape[0] if in_memory: print "getting an in-memory version of blaze arrays..." params = blaze.params(clevel=9) t0 = time() x = blaze.array(x[:], params=params) y = blaze.array(y[:], params=params) z = blaze.array(z[:], params=params) w = blaze.array(w[:], params=params) print "conversion to blaze in-memory: %.3f" % (time() - t0) print "datashape is:", x.datashape print "evaluating expression with blir..." expr = (T(x) + T(y)).dot(T(2.0) * T(z) + T(2.0) * T(w)) if "print_expr" in args: print expr.gen_blir()[1] t_ce = time() result_ce = chunked_eval(expr, chunk_size=50000) t_ce = time() - t_ce print "blir chunked result is : %s in %f s" % (result_ce, t_ce) print "***blir time: %.3f" % t_ce # in numpy... t0 = time() x = x[:] y = y[:] z = z[:] w = w[:] print "conversion to numpy in-memory: %.3f" % (time() - t0) print "evaluating expression with numpy..." t_np = time() result_np = np.dot(x + y, 2.0 * z + 2.0 * w) t_np = time() - t_np print "numpy result is : %s in %f s" % (result_np, t_np) print "***numpy time: %.3f" % t_np print "**** %d, %.5f, %.5f" % (shape[0], t_ce, t_np)
def run_test(in_memory, args): T = Terminal print 'opening blaze arrays...' x = blaze.open(_persistent_array_names[0]) y = blaze.open(_persistent_array_names[1]) z = blaze.open(_persistent_array_names[2]) w = blaze.open(_persistent_array_names[3]) shape, dtype = blaze.to_numpy(x.datashape) print "***nelements:", shape[0] if in_memory: print 'getting an in-memory version of blaze arrays...' params = blaze.params(clevel=9) t0 = time() x = blaze.array(x[:], params=params) y = blaze.array(y[:], params=params) z = blaze.array(z[:], params=params) w = blaze.array(w[:], params=params) print "conversion to blaze in-memory: %.3f" % (time() - t0) print 'datashape is:', x.datashape print 'evaluating expression with blir...' expr = (T(x) + T(y)).dot(T(2.0) * T(z) + T(2.0) * T(w)) if 'print_expr' in args: print expr.gen_blir()[1] t_ce = time() result_ce = chunked_eval(expr, chunk_size=50000) t_ce = time() - t_ce print 'blir chunked result is : %s in %f s' % (result_ce, t_ce) print '***blir time: %.3f' % t_ce # in numpy... t0 = time() x = x[:] y = y[:] z = z[:] w = w[:] print "conversion to numpy in-memory: %.3f" % (time() - t0) print 'evaluating expression with numpy...' t_np = time() result_np = np.dot(x + y, 2.0 * z + 2.0 * w) t_np = time() - t_np print 'numpy result is : %s in %f s' % (result_np, t_np) print '***numpy time: %.3f' % t_np print '**** %d, %.5f, %.5f' % (shape[0], t_ce, t_np)
def chunked_dot(a, b, chunk_size=1024): a_shape, a_dtype = blaze.to_numpy(a.datashape) total_size = a_shape[0] accum = 0.0 offset = 0 while offset < total_size: accum += np.dot(a[offset:offset+chunk_size], b[offset:offset+chunk_size]) offset += chunk_size return accum
def create_persistent_arrays(elements, clevel): dshape = str(elements) + ", float64" try: dshape = blaze.dshape(dshape) except: print elements + " is not a valid size for the arrays" return for name in _persistent_array_names: # First create a numpy container shape, dtype = blaze.to_numpy(dshape) n = np.sin(np.linspace(0, 10 * math.pi, shape[0])) _create_persistent_array(name, n, clevel)
def create_persistent_arrays(elements, clevel): dshape = str(elements) + ', float64' try: dshape = blaze.dshape(dshape) except: print elements + ' is not a valid size for the arrays' return for name in _persistent_array_names: # First create a numpy container shape, dtype = blaze.to_numpy(dshape) n = np.sin(np.linspace(0, 10 * math.pi, shape[0])) _create_persistent_array(name, n, clevel)
def evaluate(expression, chunk_size, vm="python", out_flavor="blaze", user_dict={}, **kwargs): """ evaluate(expression, vm=None, out_flavor=None, chunk_size=None, user_dict=None, **kwargs) Evaluate an `expression` and return the result. Parameters ---------- expression : string A string forming an expression, like '2*a+3*b'. The values for 'a' and 'b' are variable names to be taken from the calling function's frame. These variables may be scalars, carrays or NumPy arrays. vm : string The virtual machine to be used in computations. It can be 'numexpr' or 'python'. The default is to use 'numexpr' if it is installed. chunk_size : size of the chunk for chunked evaluation. If None, use some heuristics to infer it. out_flavor : string The flavor for the `out` object. It can be 'Blaze' or 'numpy'. user_dict : dict An user-provided dictionary where the variables in expression can be found by name. kwargs : list of parameters or dictionary Any parameter supported by the carray constructor. Returns ------- out : Blaze object The outcome of the expression. You can tailor the properties of this Blaze array by passing additional arguments supported by carray constructor in `kwargs`. """ if vm not in ("numexpr", "python"): raiseValue, "`vm` must be either 'numexpr' or 'python'" if out_flavor not in ("blaze", "numpy"): raiseValue, "`out_flavor` must be either 'blaze' or 'numpy'" # Get variables and column names participating in expression vars = user_dict # Gather info about sizes and lengths typesize, vlen = 0, 1 for var in vars.itervalues(): if not hasattr(var, "datashape"): # scalar detection continue else: # blaze arrays shape, dtype = blaze.to_numpy(var.datashape) typesize += dtype.itemsize lvar = shape[0] if vlen > 1 and vlen != lvar: raise ValueError, "arrays must have the same length" vlen = lvar if typesize == 0: # All scalars if vm == "python": return eval(expression, vars) else: import numexpr return numexpr.evaluate(expression, local_dict=vars) return _eval_blocks(expression, chunk_size, vars, vlen, typesize, vm, out_flavor, **kwargs)
def evaluate(expression, chunk_size, vm='python', out_flavor='blaze', user_dict={}, **kwargs): """ evaluate(expression, vm=None, out_flavor=None, chunk_size=None, user_dict=None, **kwargs) Evaluate an `expression` and return the result. Parameters ---------- expression : string A string forming an expression, like '2*a+3*b'. The values for 'a' and 'b' are variable names to be taken from the calling function's frame. These variables may be scalars, carrays or NumPy arrays. vm : string The virtual machine to be used in computations. It can be 'numexpr' or 'python'. The default is to use 'numexpr' if it is installed. chunk_size : size of the chunk for chunked evaluation. If None, use some heuristics to infer it. out_flavor : string The flavor for the `out` object. It can be 'Blaze' or 'numpy'. user_dict : dict An user-provided dictionary where the variables in expression can be found by name. kwargs : list of parameters or dictionary Any parameter supported by the carray constructor. Returns ------- out : Blaze object The outcome of the expression. You can tailor the properties of this Blaze array by passing additional arguments supported by carray constructor in `kwargs`. """ if vm not in ('numexpr', 'python'): raiseValue, "`vm` must be either 'numexpr' or 'python'" if out_flavor not in ('blaze', 'numpy'): raiseValue, "`out_flavor` must be either 'blaze' or 'numpy'" # Get variables and column names participating in expression vars = user_dict # Gather info about sizes and lengths typesize, vlen = 0, 1 for var in vars.itervalues(): if not hasattr(var, "datashape"): # scalar detection continue else: # blaze arrays shape, dtype = blaze.to_numpy(var.datashape) typesize += dtype.itemsize lvar = shape[0] if vlen > 1 and vlen != lvar: raise ValueError, "arrays must have the same length" vlen = lvar if typesize == 0: # All scalars if vm == "python": return eval(expression, vars) else: import numexpr return numexpr.evaluate(expression, local_dict=vars) return _eval_blocks(expression, chunk_size, vars, vlen, typesize, vm, out_flavor, **kwargs)
def _eval_blocks(expression, vars, vlen, typesize, vm, out_flavor, **kwargs): """Perform the evaluation in blocks.""" # Compute the optimal block size (in elements) # The next is based on experiments with bench/ctable-query.py if vm == "numexpr": # If numexpr, make sure that operands fits in L3 chache bsize = 2**20 # 1 MB is common for L3 else: # If python, make sure that operands fits in L2 chache bsize = 2**17 # 256 KB is common for L2 bsize //= typesize # Evaluation seems more efficient if block size is a power of 2 bsize = 2 ** (int(math.log(bsize, 2))) if vlen < 100*1000: bsize //= 8 elif vlen < 1000*1000: bsize //= 4 elif vlen < 10*1000*1000: bsize //= 2 # Protection against too large atomsizes if bsize == 0: bsize = 1 vars_ = {} # Get temporaries for vars maxndims = 0 for name in vars.iterkeys(): var = vars[name] if hasattr(var, "datashape"): shape, dtype = blaze.to_numpy(var.datashape) ndims = len(shape) + len(dtype.shape) if ndims > maxndims: maxndims = ndims for i in xrange(0, vlen, bsize): # Get buffers for vars for name in vars.iterkeys(): var = vars[name] if hasattr(var, "datashape"): shape, dtype = blaze.to_numpy(var.datashape) vars_[name] = var[i:i+bsize] else: if hasattr(var, "__getitem__"): vars_[name] = var[:] else: vars_[name] = var # Perform the evaluation for this block if vm == "python": res_block = eval(expression, None, vars_) else: import numexpr res_block = numexpr.evaluate(expression, local_dict=vars_) if i == 0: # Detection of reduction operations scalar = False dim_reduction = False if len(res_block.shape) == 0: scalar = True result = res_block continue elif len(res_block.shape) < maxndims: dim_reduction = True result = res_block continue # Get a decent default for expectedlen if out_flavor == "blaze": nrows = kwargs.pop('expectedlen', vlen) result = blaze.array(res_block, **kwargs) else: out_shape = list(res_block.shape) out_shape[0] = vlen result = np.empty(out_shape, dtype=res_block.dtype) result[:bsize] = res_block else: if scalar or dim_reduction: result += res_block elif out_flavor == "blaze": result.append(res_block) else: result[i:i+bsize] = res_block # if isinstance(result, blaze.Array): # result.flush() if scalar: return result[()] return result