def test_running_multiple_variants(self): mod = asp_module.ASPModule() mod = asp_module.ASPModule() mod.add_function("foo", ["void foo_1(){};", "void foo_2(){};"], ["foo_1", "foo_2"]) mod.foo() mod.foo() self.assertEqual(len(mod.db.get("foo")), 2)
def test_db_integration(self): m = asp_module.ASPModule() m.add_function("foo", "void foo(){return;}") m.foo() # Now let's check the db for what's inside self.assertEqual(len(m.db.get("foo")), 1)
def initialize_asp_mod(self): # Create ASP module GMM.asp_mod = asp_module.ASPModule(use_cuda=GMM.use_cuda, use_cilk=GMM.use_cilk) if GMM.use_cuda: print "GMM SPECIALIZER: USING CUDA" self.insert_base_code_into_listed_modules(['c++']) self.insert_non_rendered_code_into_cuda_module() self.insert_rendered_code_into_module('cuda') GMM.asp_mod.backends['cuda'].toolchain.cflags.extend(["-Xcompiler","-fPIC","-arch=sm_%s%s" % GMM.platform_info['cuda']['capability'] ]) # TODO: For now, must force ONLY cuda backend to compile GMM.asp_mod.backends['c++'].compilable = False #print GMM.asp_mod.generate() if GMM.use_cilk: print "GMM SPECIALIZER: USING CILK+" self.insert_base_code_into_listed_modules(['cilk']) self.insert_non_rendered_code_into_cilk_module() self.insert_rendered_code_into_module('cilk') GMM.asp_mod.backends['cilk'].toolchain.cc = 'icc' GMM.asp_mod.backends['cilk'].toolchain.cflags = ['-O2','-gcc', '-ip','-fPIC'] # Setup toolchain from codepy.libraries import add_numpy, add_boost_python, add_cuda for name, mod in GMM.asp_mod.backends.iteritems(): add_numpy(mod.toolchain) add_boost_python(mod.toolchain) if name in ['cuda']: add_cuda(mod.toolchain) return GMM.asp_mod
def test_adding_function(self): m = asp_module.ASPModule() m.add_function("foo", "void foo(){return;}") self.assertTrue( isinstance(m.specialized_functions["foo"], asp_module.SpecializedFunction))
def test_adding_with_call_policy(self): mod = asp_module.ASPModule() # add a struct to the module mod.add_to_module("struct foo { int a; };\n") # we also want to expose the struct to Python so we can pass instances # back and forth mod.expose_class("foo") # add a function that returns a pointer to this arbitrary struct # we have to specify a call policy because we are returning a pointer to a C++ object mod.add_function( "get_foo", "struct foo* get_foo() { struct foo* f = new foo; f->a = 10; return f; }\n", call_policy="python_gc") # add a function that takes a foo and returns the int mod.add_function("get_int", "int get_int(struct foo* f) { return f->a; }") # take a look at the generated code # print mod.generate() # let's create a foo foo = mod.get_foo() # and now let's make sure that if we pass foo back to C++, it is the same instance self.assertEqual(mod.get_int(foo), 10)
def test_generate(self): a = asp_module.ASPModule() mock_backend = Mock() a.backends["c++"] = mock_backend a.generate() self.assertTrue(mock_backend.module.generate.called)
def test_helper_function(self): m = asp_module.ASPModule() m.add_helper_function("foo_helper", "PyObject* foo_helper(){Py_RETURN_TRUE;}") self.assertTrue("foo_helper" in m.specialized_functions) self.assertTrue(m.foo_helper())
def double_using_template(self, arr): import asp.codegen.templating.template as template mytemplate = template.Template(filename="templates/double_template.mako", disable_unicode=True) rendered = mytemplate.render(num_items=len(arr)) import asp.jit.asp_module as asp_module mod = asp_module.ASPModule() # remember, must specify function name when using a string mod.add_function("double_in_c", rendered) return mod.double_in_c(arr)
def test_running_variants_with_unrunnable_inputs(self): mod = asp_module.ASPModule() mod.add_function("foo", ["void foo_1(int a){};", "void foo_2(int a){};"], ["foo_1", "foo_2"], [ lambda *args, **kwargs: True, lambda *args, **kwargs: args[0] < 2 ]) mod.foo(1) mod.foo(1) mod.foo(10) mod.foo(10) self.assertEqual( len(filter(lambda x: x[1] == u'foo_2', mod.db.get("foo"))), 1)
def map_using_trees(self, arr): operation_ast = ast_tools.parse_method(self.operation) expr_ast = operation_ast.body[0].body[0].value converter = Converter() expr_cpp = converter.visit(expr_ast) import asp.codegen.templating.template as template mytemplate = template.Template(filename="templates/map_template.mako", disable_unicode=True) rendered = mytemplate.render(num_items=len(arr), expr=expr_cpp) import asp.jit.asp_module as asp_module mod = asp_module.ASPModule() mod.add_function("map_in_c", rendered) return mod.map_in_c(arr)
def test_cuda(self): mod = asp_module.ASPModule(use_cuda=True) # create the host code mod.add_to_preamble(""" #define N 10 void add_launch(int*,int*,int*); """, backend="c++") mod.add_helper_function("foo_1", """int foo_1(){ int a[N], b[N], c[N]; int *dev_a, *dev_b, *dev_c; cudaMalloc( (void**)&dev_a, N * sizeof(int) ); cudaMalloc( (void**)&dev_b, N * sizeof(int) ); cudaMalloc( (void**)&dev_c, N * sizeof(int) ); for (int i=0; i<N; i++) { a[i] = -i; b[i] = i * i; } cudaMemcpy( dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice ); cudaMemcpy( dev_b, b, N * sizeof(int), cudaMemcpyHostToDevice ); cudaMemcpy( c, dev_c, N * sizeof(int), cudaMemcpyDeviceToHost ); add_launch(dev_a, dev_b, dev_c); cudaFree( dev_a ); cudaFree( dev_b ); cudaFree( dev_c ); return 0;}""", backend="cuda") # create device code mod.add_to_module(""" #define N 10 __global__ void add( int *a, int *b, int *c ) { int tid = blockIdx.x; // handle the data at this index if (tid < N) c[tid] = a[tid] + b[tid]; } void add_launch(int *a, int *b, int *c) { add<<<N,1>>>( a, b, c ); } """, backend='cuda') # test a call ret = mod.foo_1() self.assertTrue(ret == 0)
def __init__(self): self.rawinfo = [] self.cuda_util_mod = asp_module.ASPModule(use_cuda=True) cuda_util_funcs = [(""" void set_device(int dev) { int GPUCount; cudaGetDeviceCount(&GPUCount); if(GPUCount == 0) { dev = 0; } else if (dev >= GPUCount) { dev = GPUCount-1; } cudaSetDevice(dev); }""", "set_device"), (""" boost::python::tuple device_compute_capability(int dev) { int major, minor; cuDeviceComputeCapability(&major, &minor, dev); return boost::python::make_tuple(major, minor); }""", "device_compute_capability"), (""" int get_device_count() { int count; cudaGetDeviceCount(&count); return count; }""", "get_device_count"), (""" int device_get_attribute( int attr, int dev) { int pi; cuDeviceGetAttribute(&pi, (CUdevice_attribute)attr, dev); return pi; }""", "device_get_attribute"), (""" size_t device_total_mem(int dev) { size_t bytes; cuDeviceTotalMem(&bytes, dev); return bytes; }""", "device_total_mem")] for fbody, fname in cuda_util_funcs: self.cuda_util_mod.add_helper_function(fname, fbody, backend='cuda') self.cuda_device_id = None
def map_using_trees(self, arr): import asp.codegen.templating.template as template import inspect import asp.codegen.python_ast as ast import asp.codegen.ast_tools as ast_tools src = inspect.getsource(self.operation) operation_ast = ast.parse(src.lstrip()) return_ast = operation_ast.body[0] expr_ast = return_ast.body[0].value expr_cpp = ast_tools.ConvertAST().visit(expr_ast) mytemplate = template.Template(filename="templates/map_template.mako", disable_unicode=True) rendered = mytemplate.render(num_items=len(arr), expr=expr_cpp) import asp.jit.asp_module as asp_module mod = asp_module.ASPModule() mod.add_function("map_in_c", rendered) return mod.map_in_c(arr)
def initialize_asp_mod(self): # Create ASP module SVM.asp_mod = asp_module.ASPModule(use_cuda=SVM.use_cuda) if SVM.use_cuda: self.insert_cache_controller_code_into_listed_modules(['c++', 'cuda']) self.insert_base_code_into_listed_modules(['c++']) self.insert_non_rendered_code_into_module() self.insert_rendered_code_into_cuda_module() SVM.asp_mod.backends['cuda'].toolchain.cflags.extend(["-Xcompiler","-fPIC", "-arch=sm_%s%s" % SVM.platform_info['cuda']['capability'] ]) SVM.asp_mod.backends['c++'].toolchain.cflags.extend(["-lcublas"]) SVM.asp_mod.backends['c++'].compilable = False # TODO: For now, must force ONLY cuda backend to compile #print SVM.asp_mod.generate() # Setup toolchain for name, mod in SVM.asp_mod.backends.iteritems(): add_numpy(mod.toolchain) add_boost_python(mod.toolchain) if name in ['cuda']: add_cuda(mod.toolchain) return SVM.asp_mod
def run_distributed(self, data): mod = asp_module.ASPModule( cache_dir="/root/spark/examples/target/scala-2.9.1.final/classes/", use_scala=True) scala_estimate = ast_tools.ConvertPyAST_ScalaAST().visit( self.estimate_ast) scala_reduce = ast_tools.ConvertPyAST_ScalaAST().visit(self.reduce_ast) scala_average = ast_tools.ConvertPyAST_ScalaAST().visit( self.average_ast) scala_gen = SourceGenerator(self.TYPE_DECS) rendered_scala_input_funcs = scala_gen.to_source(scala_estimate)+'\n' + scala_gen.to_source(scala_reduce) \ +'\n'+ scala_gen.to_source(scala_average) rendered_scala = self.prepend_scala_blb_core_funcs( rendered_scala_input_funcs) #should generate function signature for run here.. using codegen ? rendered_scala_object = avro_backend.generate_scala_object( "run", "", rendered_scala) #NOTE: must append outer to function name above to get the classname # because of how scala_object created by avro_backend mod.add_function("run_outer", rendered_scala_object, backend="scala") #NOTE: must add dependencies in make_dependency_jar so that slave nodes will receive proper files time_stamp = str(int(round(time.time() * 1000))) os.system('/root/BLB/distributed/make_dependency_jar ' + '/root/BLB/distributed/dependencies/' + time_stamp) os.environ[ 'DEPEND_LOC'] = '/root/BLB/distributed/dependencies/' + time_stamp + '/depend.jar' num_spark_tasks = self.get_num_spark_tasks() print 'Running Spark with', num_spark_tasks, 'tasks' return mod.run_outer(data, num_spark_tasks, self.dim, self.num_subsamples, self.num_bootstraps, self.subsample_len_exp)
def shadow_kernel(self, *args): if self.pure_python: return self.pure_python_kernel(*args) #FIXME: instead of doing this short-circuit, we should use the Asp infrastructure to # do it, by passing in a lambda that does this check # if already specialized to these sizes, just run if self.specialized_sizes and self.specialized_sizes == [ y.shape for y in args ]: debug_print("match!") self.mod.kernel(*[y.data for y in args]) return # otherwise, do the first-run flow # ask asp infrastructure for machine and platform info, including if cilk+ is available #FIXME: impelement. set self.with_cilk=true if cilk is available input_grids = args[0:-1] output_grid = args[-1] model = copy.deepcopy(self.model) model = StencilUnrollNeighborIter(model, input_grids, output_grid).run() # depending on whether cilk is available, we choose which converter to use if not self.with_cilk: Converter = StencilConvertAST else: Converter = StencilConvertASTCilk # generate variant with no unrolling, then generate variants for various unrollings base_variant = Converter(model, input_grids, output_grid).run() variants = [base_variant] variant_names = ["kernel"] # we only cache block if the size is large enough for blocking # or if the user has told us to if (len(args[0].shape) > 1 and args[0].shape[0] > 128): self.should_cacheblock = True self.block_sizes = [16, 32, 48, 64, 128, 160, 192, 256] else: self.should_cacheblock = False self.block_sizes = [] if self.should_cacheblock and self.should_unroll: import itertools for b in list( set( itertools.permutations(self.block_sizes, len(args[0].shape) - 1))): for u in [1, 2, 4, 8]: # ensure the unrolling is valid for the given blocking #if b[len(b)-1] >= u: if args[0].shape[len(args[0].shape) - 1] >= u: c = list(b) c.append(1) #variants.append(Converter(model, input_grids, output_grid, unroll_factor=u, block_factor=c).run()) variant = StencilOptimizeCpp( copy.deepcopy(base_variant), output_grid.shape, unroll_factor=u, block_factor=c).run() variants.append(variant) variant_names.append("kernel_block_%s_unroll_%s" % ('_'.join([str(y) for y in c]), u)) debug_print("ADDING BLOCKED") if self.should_unroll: for x in [2, 4, 8, 16]: #,32,64]: check_valid = max( map( # FIXME: is this the right way to figure out valid unrollings? lambda y: (y.shape[-1] - 2 * y.ghost_depth) % x, args)) if check_valid == 0: debug_print("APPENDING VARIANT %s" % x) variants.append( StencilOptimizeCpp(copy.deepcopy(base_variant), output_grid.shape, unroll_factor=x).run()) variant_names.append("kernel_unroll_%s" % x) debug_print(variant_names) from asp.jit import asp_module mod = self.mod = asp_module.ASPModule() self.add_libraries(mod) self.set_compiler_flags(mod) mod.add_function("kernel", variants, variant_names) # package arguments and do the call myargs = [y.data for y in args] mod.kernel(*myargs) # save parameter sizes for next run self.specialized_sizes = [x.shape for x in args]
def test_adding_multiple_variants(self): mod = asp_module.ASPModule() mod.add_function("foo", ["void foo_1(){};", "void foo_2(){};"], ["foo_1", "foo_2"]) self.assertTrue( "foo_1" in mod.specialized_functions["foo"].variant_names)
def build_mod(self, key): template_name = '' if self.with_openMP: template_name = 'blb_omp.mako' elif self.with_cilk: template_name = 'blb_cilk.mako' else: template_name = 'blb_template.mako' import asp.codegen.templating.template as template blb_template = template.Template(filename="templates/%s" % template_name, disable_unicode=True) impl_template = template.Template(filename="templates/blb_impl.mako", disable_unicode=True) #leading dimension of first data object tuple n_vecs = key[0][0][0] vec_n = int(pow(n_vecs, self.subsample_len_exp)) impl_args = {} #impl_args = {'dim': self.dim} #impl_args['n_data'] = key[0] #impl_args['sub_n'] = int( pow( key[0], self.subsample_len_exp ) ) impl_args['vec_n'] = vec_n impl_args['n_vecs'] = n_vecs impl_funcs = [] impl_args['scalar_type'] = 'double' estimate_converter = BLBConverter(self.create_input_model(key, vec_n), input_size=vec_n, weighted=True) estimate_cpp = estimate_converter.render(self.estimate_ast) impl_args['classifier'] = estimate_cpp impl_funcs.extend(estimate_converter.desired_funcs) impl_args['bootstrap_model'] = estimate_converter.get_ret_model() arg_model = [model.clone() for model in estimate_converter.arg_model] for i in range(len(arg_model)): arg_model[i].dimensions = key[i][0] impl_args['arg_model'] = arg_model reduce_input = estimate_converter.get_ret_model().clone() reduce_input.dimensions.insert(0, self.num_bootstraps) reduce_input.set_len(self.num_bootstraps) reduce_converter = BLBConverter([reduce_input], input_size=self.num_bootstraps) reduce_cpp = reduce_converter.render(self.reduce_ast) impl_args['bootstrap_reducer'] = reduce_cpp impl_funcs.extend(reduce_converter.desired_funcs) impl_args['subsample_model'] = reduce_converter.get_ret_model() average_input = reduce_converter.get_ret_model().clone() average_input.dimensions.insert(0, self.num_subsamples) average_input.set_len(self.num_subsamples) average_converter = BLBConverter([average_input], input_size=self.num_subsamples) average_cpp = average_converter.render(self.average_ast) impl_args['subsample_reducer'] = average_cpp impl_funcs.extend(average_converter.desired_funcs) impl_args['average_model'] = average_converter.get_ret_model() impl_args['desired_funcs'] = set(impl_funcs) fwk_args = self.set_framework_args(key, impl_args.copy()) rendered = blb_template.render(**fwk_args) """ === This was for non sejitizing code === if self.compute_estimate in BLB.known_reducers: impl_args['use_classifier'] = self.compute_estimate else: impl_args['classifier'] = self.compute_estimate if self.reduce_bootstraps in BLB.known_reducers: impl_args['use_bootstrap_reducer'] = self.reduce_bootstraps else: impl_args['bootstrap_reducer'] = self.reduce_bootstraps if self.average in BLB.known_reducers: impl_args['use_subsample_reducer'] = self.average else: impl_args['subsample_reducer'] = self.average """ rendered_impl = impl_template.render(**impl_args) import asp.jit.asp_module as asp_module mod = asp_module.ASPModule(specializer='BLB', cache_dir=cache_dir) mod.add_function('compute_estimate', rendered_impl) mod.add_function("compute_blb", rendered) self.set_compiler_flags(mod) self.set_includes(mod) f = open('blbout.cpp', 'w+') f.write(str(mod.backends['c++'].module.generate())) f.close() return mod
def test_adding_and_calling(self): m = asp_module.ASPModule() m.add_function("foo", "PyObject* foo(){Py_RETURN_TRUE;}") self.assertTrue(m.foo())