def build_transformer(name): """ :param results: the graph nodes that we care about, for the computation :return: the dictionary of transformers, with names matching the graph node hints """ if 'numpy' in name: transformer = make_transformer_factory('numpy')() elif 'gpu' in name: try: from ngraph.transformers.gputransform import GPUTransformer # noqa transformer = make_transformer_factory('gpu')() except ImportError: assert False, "Fatal: Unable to initialize GPU, " \ "but GPU transformer was requested." else: assert False, "Unknown device!" return transformer
if self.globals.get('mkldnn', None) is not None: self.globals.execute('mkldnn.close()') if self.globals.get('mlsl_obj', None) is not None: for device_buffer in self.device_buffers: self.globals.execute( "mlsl_obj.free({}.__array_interface__['data'][0])". format(device_buffer.ref_str)) self.globals.execute('mlsl_obj.finalize()') except TypeError: pass self.code = None def consume(self, buf_index, hostlist, devlist): ''' This is currently used for Aeon dataloading -- need to set things up to do actual device buffer allocation ''' assert 0 <= buf_index < 2, 'Can only double buffer' hb = np.rollaxis(hostlist[buf_index], 0, hostlist[buf_index].ndim) if devlist[buf_index] is None: devlist[buf_index] = np.empty_like(hb) devlist[buf_index][:] = hb def make_computation(self, computation): return CPUDeviceComputation(self, computation) set_transformer_factory( make_transformer_factory(CPUTransformer.transformer_name))
# print(self.code.filename) r = self.code.compile("op", globals()) self.model = r['Model']() self.model.conv_params = self.compute_code.conv_params self.model.pool_params = self.compute_code.pool_params self.model.conv_slices = self.compute_code.conv_slices self.model.pool_slices = self.compute_code.pool_slices for computation in self.computations: executor = getattr(self.model, computation.name) computation.executor = executor def allocate_storage(self): self.model.allocate() def consume(self, buf_index, hostlist, devlist): ''' This is currently used for Aeon dataloading -- need to set things up to do actual device buffer allocation ''' assert 0 <= buf_index < 2, 'Can only double buffer' hb = np.rollaxis(hostlist[buf_index], 0, hostlist[buf_index].ndim) if devlist[buf_index] is None: devlist[buf_index] = np.empty_like(hb) devlist[buf_index][:] = hb set_transformer_factory( make_transformer_factory(NumPyTransformer.transformer_name))