def init(self, input_width): self.input_width = input_width if not isinstance(self.weight_initializer, Initializer): weight_initializer = self.weight_initializer( self.input_width, self.units) if not isinstance(self.bias_initializer, Initializer): bias_initializer = self.bias_initializer() self.weights_buf = weight_initializer((self.units, self.input_width)) self.weights = array.to_device(self.queue, self.weights_buf) self.bias_buf = bias_initializer((self.units, 1)) self.bias = array.to_device(self.queue, self.bias_buf) # should probably make this 2d so it can have dimensions (output_width, batch_size) self.output = array.zeros(self.queue, (self.batch_size, self.units), dtype=dtype) self.output_data = self.output.data self.input_width = cltypes.uint(self.input_width) self.output_width = cltypes.uint(self.units) self.activation = self.activation max_output, max_batch_size = self.queue.device.max_work_item_sizes[:2] if self.output_width > max_output: raise ValueError( f"Layer output cannot exceed {max_output}, you gave {self.output_width}" ) if self.batch_size > max_batch_size: raise ValueError( f"Batch size cannot exceed {max_batch_size}, you gave {self.batch_size}" ) return self.units
def inferSingleStepCL(self, pattern, weights): """ __constant char* pattern, __constant float* weights, __global float* predictions, __global float* sums, uint const numBuckets :param pattern: :param param: :return: """ cl_pattern = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=pattern) cl_weights = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=weights) predictions = np.empty(len(pattern), dtype=cltypes.float) cl_predictions = cl.Buffer(self._ctx, mf.WRITE_ONLY, predictions.nbytes) cl_sums = cl.Buffer(self._ctx, mf.READ_WRITE, 32 * len(pattern)) self._prg.inferSingleStep(self._queue, (pattern.shape[0], ), None, cl_pattern, cl_weights, cl_predictions, cl_sums, cltypes.uint(self._numBuckets), cltypes.uint(self._maxinput)) cl.enqueue_copy(self._queue, predictions, cl_predictions).wait() return predictions
def compute(self, recordNum, pattern, bucketIdx, actValue, learn, infer): """ Computes 1 step :param recordNum: :param pattern: indices of active columns in the TM layer :param classification: dict of bucketIdx and actualValue :param learn: :param infer: :return: """ pattern = np.array(pattern, dtype=cltypes.uint) if not self._init_buffers: self._setup_buffers(pattern) ev_copy_pattern = cl.enqueue_write_buffer(self._queue, self.cl_activeBitIdx, pattern) # update bit activations on device side ev_update_bit = self._prg.update_bit_activations(self._queue, (pattern.size,), None, self.cl_bit_activations, self.cl_activeBitIdx, wait_for=[ev_copy_pattern]) multiStepPredictions = {} ev_learn = None if learn: ev_learn = [self._prg.learn(self._queue, (self.step_count * pattern.size,), None, self.cl_activeBitIdx, self.cl_table_average, self.cl_table_counts, self.alpha, self.actValueAlpha, cltypes.uint(bucketIdx), self._numBuckets, wait_for=[ev_update_bit])] if infer: """ const __global float* averages, const __global uint* counts, const __global uint* activeBitIdx, __global float2* predictions, // the array of predictions __global const uint* bitActivations, // the number of times each bit has been active uint const activeBits """ # kernel for every active bit in each step ev_infer = self._prg.infer(self._queue, (self._numBuckets,), None, self.cl_table_average, self.cl_table_counts, self.cl_activeBitIdx, self.cl_predictions, self.cl_bit_activations, cltypes.uint(pattern.size), wait_for=ev_learn) cl.enqueue_copy(self._queue, self._predictions, self.cl_predictions, wait_for=[ev_infer]).wait() # print("Activations", self.bucket_activations) # multiStepPredictions['actualValues'] = predictions['x'] / len(pattern) # multiStepPredictions[step] = predictions['y'] / len(pattern) # the probability for each bucket # print("Actual Values", multiStepPredictions['actualValues']) multiStepPredictions[1] = self._predictions.copy() # print("Probability", multiStepPredictions[1]) self.bucket_activations[bucketIdx] += 1 return multiStepPredictions
def _get_overlap_score_loop_bin(self, encoding): """ Returns an array with boosted and non-boosted scores as a vector :param encoding: the encoded data :return: overlap_loop( __constant int64* activeBits, // active bits in sorted order __constant synapse_struct* synapses, // all the synapses __global uint2* overlaps, // columns to store overlap scores __constant float* boostFactors, // boost values for columns const float synPermConnected, const int synapsesPerColumn, const uint numActiveBits ) """ active_bits = np.where(encoding == 1)[0] cl.enqueue_write_buffer(self._queue, self.cl_active_bits, active_bits) cl_synapses = self._get_cl_synapses_buffer() cl_boostFactors = self._get_cl_boost_factor_buffer() overlap = np.zeros(self.columnCount, dtype=cl.array.vec.uint2 ) # array of overlap and boosted overlap scores cl_overlap = cl.Buffer(self._ctx, mf.WRITE_ONLY, overlap.nbytes) self.prog.overlap_loop_bin(self._queue, (self.columnCount, ), None, self.cl_active_bits, cl_synapses, cl_overlap, cl_boostFactors, self.synPermConnected, self.synapsesPerColumn, cltypes.uint(active_bits.size)).wait() cl.enqueue_copy(self._queue, overlap, cl_overlap).wait() return overlap
def __init__(self, queue, activationThreshold=14, cellsPerColumn=32, columnCount=2048, globalDecay=0.0, initialPerm=0.21, inputWidth=2048, maxAge=0, maxSegmentsPerCell=128, maxSynapsesPerSegment=32, minThreshold=11, newSynapseCount=20, outputType='normal', pamLength=3, permanenceDec=0.1, permanenceInc=0.1, seed=1960, temporalImp='cl', verbosity=0): if temporalImp != 'cl': raise ValueError('This implementation only supports OpenCL') self.activationThreshold = cltypes.uint(activationThreshold) self.columnCount = cltypes.uint(columnCount) self.cellsPerColumn = cltypes.uint(cellsPerColumn) self.globalDecay = cltypes.float(globalDecay) self.initialPerm = cltypes.float(initialPerm) self.maxAge = cltypes.uint(maxAge) self.maxSegmentsPerCell = cltypes.uint(maxSegmentsPerCell) self.maxSynapsesPerSegment = cltypes.uint(maxSynapsesPerSegment) self.minThreshold = cltypes.uint(minThreshold) self.newSynapseCount = cltypes.uint(newSynapseCount) self.outputType = outputType self.pamLength = cltypes.uint(pamLength) self.permanenceDec = cltypes.float(permanenceDec) self.permanenceInc = cltypes.float(permanenceInc) np.random.seed(seed) self.verbosity = verbosity self.columnCount = columnCount self.inputWidth = inputWidth self._queue = queue self._ctx = queue.context np.random.seed(seed) self._setup_cl_buffers()
def __init__(self, queue, numBuckets, steps=[1], bits=2048, alpha=0.001, actValueAlpha=0.3, verbosity=False): self._prg = cl.Program(queue.context, kernel_src).build() self._learn_iteration = 0 self.bit_activations = np.zeros(bits, dtype=cltypes.uint) self.bucket_activations = np.zeros(numBuckets, dtype=cltypes.uint) self.steps = steps self.step_count = len(steps) self.alpha = cltypes.float(alpha) self.actValueAlpha = cltypes.float(actValueAlpha) self.bits = bits # number of bits in the input self._queue = queue # the opencl queue self._ctx = queue.context # the opencl context self._numBuckets = cltypes.uint(numBuckets) self._verbose = verbosity self._init_buffers = False
def run(ctx, src): w, h = 1024, 512 depth = 1024 img = np.zeros((h, w), dtype=cltypes.float) time = run_kernel( ctx, src, (w, h), Mem(img), *[cltypes.uint(k) for k in (w, h, depth)], ) """ for row in img[::16,::16]: for z in row: print("@" if z > 0.5 else ".", end="") print() """ print("\t{:.3f} sec: {}".format(time, os.path.split(src)[1])) return (img, )
def __init__(self, dims): self.width = dims[0] self.height = dims[1] import pyopencl as cl from pyopencl import cltypes import numpy as np from matplotlib import cm self.cm = cm self.np = np self.cl = cl self.ctx = cl.Context([cl.get_platforms()[1].get_devices()[0]]) self.queue = cl.CommandQueue(self.ctx) self.prg = cl.Program( self.ctx, """ #pragma OPENCL EXTENSION cl_khr_byte_addressable_store : enable __kernel void mandelbrot(__global float2 *q, __constant uchar4 *lut, __global uchar4 *output, __global uint* output2, uint const maxiter) { const int gid = get_global_id(0); float real = q[gid].x; float imag = q[gid].y; output[gid] = (uchar4)(0,0,0,0); output2[gid] = 0; for(uint curiter = 0; curiter < maxiter; curiter++) { float real2 = real*real, imag2 = imag*imag; if (real2 + imag2 > 4.0f) { output[gid] = lut[curiter]; output2[gid] = curiter; return; } imag = 2 * real*imag + q[gid].y; real = real2 - imag2 + q[gid].x; } } """).build() import time self.time = time self.centerx = (-0.74877 + -0.74872) / 2 self.centery = (0.06505 + 0.06510) / 2 self.padding = 2 self.maxiter = cltypes.uint(64) # self.xmin = -np.pi # self.xmax = np.pi # self.ymin = -np.pi # self.ymax = np.pi self.update_pos() cmap = self.cm.get_cmap('gnuplot2', self.maxiter) cols = [(np.array(cmap(i)[:-1]) * 255).astype(cl.cltypes.uchar) for i in range(self.maxiter)] self.lut = np.zeros((self.maxiter, ), cl.cltypes.uchar4) for idx, i in enumerate(cols): self.lut[idx][0] = i[0] self.lut[idx][1] = i[1] self.lut[idx][2] = i[2] self.lut_opencl = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.lut)
], dtype=cltypes.float) colors_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=colors) color_width = 8.0 # Read source file with open(src_path, "r") as f: src = f.read() # Build program prg = cl.Program(ctx, src).build() # Execute OpenCL kernel on the device prg.render(queue, (width, height), None, image_buf, map_buf, colors_buf, *[cltypes.uint(x) for x in [width, height, depth, ssf]], cltypes.uint(len(colors)), cltypes.float(color_width)) # Copy rendered image to host cl.enqueue_copy(queue, image, image_buf) # Flush queue queue.flush() queue.finish() # Draw low-resolution image in terminal for row in np.mean(image[::32, ::16], axis=2): for x in row: print("@" if x > 1e-4 else ".", end="") print()
def compute(self, recordNum, pattern, classification, learn, infer): """ Computes 1 step :param recordNum: :param pattern: indices of active columns in the TM layer :param classification: dict of bucketIdx and actualValue :param learn: :param infer: :return: """ if self.verbosity: print(" recordNum:", recordNum) print(" patternNZ (%d):" % len(pattern), pattern) print(" classificationIn:", classification) bucketIdx, actValue = classification['bucketIdx'], classification[ 'actValue'] pattern = np.array(pattern).astype(cltypes.uint) self._patternNZHistory.append((recordNum, pattern)) retval = None if infer: retval = self.infer(pattern, classification) return retval if learn and bucketIdx is not None: cl_activeBitIdx = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=pattern) for learnRecordNum, learnPattern in self._patternNZHistory: error = dict() targetDist = np.zeros(self._numBuckets + 1, dtype=cltypes.float) targetDist[bucketIdx] = 1.0 for step, table in self._weights.iteritems(): # print("old table") # self._show_table(table) """ int* activeBitIdx float2 *table, // x=histogram, y=moving average float const alpha, // moving average alpha float const actualValue, // actual input value int const bucketIdx, // bucket that actualValue falls into int const bucketCount, bool const learn, bool const infer, __global float *predictions """ new_table = table.copy() cl_new_table = cl.Buffer(self._ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=new_table) cl_table = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=table) if learn: cl_table = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=table) if infer: predictions = np.zeros(self._numBuckets, dtype=cl.array.vec.float2) cl_predictions = cl.Buffer(self._ctx, mf.READ_WRITE, predictions.nbytes) else: cl_predictions = cl.Buffer(self._ctx, mf.WRITE_ONLY, 1) self._prg.infer_compute(self._queue, (pattern.shape[0], ), None, cl_activeBitIdx, cl_table, cl_new_table, cltypes.float(self.actValueAlpha), cltypes.float(actValue), cltypes.uint(bucketIdx), cltypes.uint(self._numBuckets), cltypes.char(learn), cltypes.char(infer), cl_predictions) if learn: cl.enqueue_copy(self._queue, self.steps[step], cl_new_table).wait() if infer: cl.enqueue_copy(self._queue, predictions, cl_predictions).wait() print("Activations", self.bucket_activations) multiStepPredictions[step] = predictions['y'] / len( pattern) # the probability for each bucket print("Actual Values", multiStepPredictions['actualValues']) print("Probability", multiStepPredictions[step]) self.bucket_activations[bucketIdx] += 1 return multiStepPredictions
def train(self, epochs: int, loss: Loss, optimizer: Optimizer, x_train, y_train, x_test, y_test, x_validation=None, y_validation=None, batch_size: int = 1, shuffle: bool = True, validation_pct=None, validation_method='cross-validation', callbacks=[]): """ :param epochs: number of epochs to run :param loss: a loss function :param optimizer: the optimizer to use :param x_train: a 2D array of shape (rows, features) :param y_train: a 2d array of shape (rows, output features), output_features is the number of values we want to predict :param x_test: testing data inputs :param y_test: testing data true values :param validation_method: a string to determine which validation method to use: 'holdout','cross-validation' :return: None For example, our input might be: x_train = [ [0,1,1], [0,2,1], [1,2,1], [0,3,4], ] That is 4 rows with 3 features each, we might do a binary classification on this: y_train = [ [0,1], [0,1], [1,0], [0,1] ] That is, each training input maps to one of these All this will be copied to the device Validation methods are: 1. Specify x_validation,y_validation and the same provided dataset will be used to validate every epoch 2. Specify validation_pct to determine how much of the training set will be set aside as validation. Specify validation_method to determine which method to use: * holdout: the same subset of x_train is used to validate each epoch * cross-validation: at the start of each epoch a random sample of x_train/y_train is set aside """ if validation_pct is not None and x_validation is not None and y_validation is not None: raise ValueError( "Please set either validation_pct or (x_validation,x_validation)" ) if x_validation is not None != x_validation is not None: raise ValueError("Please set both (x_validation and y_validation)") x_train = x_train.astype(dtype) y_train = y_train.astype(dtype) if validation_pct: # slice off the last validation_ct from x_train,y_train if 0 <= validation_pct < 1: training_samples = int(x_train.shape[0] * (1 - validation_pct)) validation_samples = int(x_train.shape[0] * validation_pct) if validation_method == 'holdout': print( f"Holding out last {validation_samples} samples of training data for validation" ) x_train = x_train[:training_samples] y_train = y_train[:training_samples] x_validation = x_train[training_samples:] y_validation = y_train[training_samples:] x_val_gpu = array.to_device(self.queue, x_validation) y_val_gpu = array.to_device(self.queue, y_validation) elif validation_method == 'cross-validation': print( f"Using cross-validation on last {validation_samples}") else: raise ValueError("Invalid validation method") validation_user = False else: raise ValueError( "Validation_pct must be in range 0 <= val% < 1") elif x_validation is not None and y_validation is not None: print("User provided validation") x_validation = x_validation.astype(dtype) y_validation = y_validation.astype(dtype) x_val_gpu = array.to_device(self.queue, x_validation) y_val_gpu = array.to_device(self.queue, y_validation) validation_samples = len(x_validation) training_samples = x_train.shape[0] validation_user = True else: training_samples = x_train.shape[0] if len(x_train) != len(y_train): raise ValueError("X and Y for test/train must be same length") if training_samples % batch_size != 0: raise ValueError( "Training dataset must have rows divisible by batch size") input_features = cltypes.uint(x_train.shape[1]) output_features = cltypes.uint(y_train.shape[1]) if input_features != self.layers[0].input_width: raise ValueError( f"Input features (provided={input_features}) must be the same as layer_0 input width (required={self.layers[0].input_width})" ) # Just copy all training and all testing data to the device for dn, ds in ("x_train", x_train), ("y_train", y_train), ("x_validation", x_validation), ("y_validation", y_validation): try: print("{}\n\tsize={}\n\tshape={}".format( dn, humanize.naturalsize(ds.nbytes), ds.shape)) except AttributeError: pass # x_train_gpu = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=x_train) x_train_gpu = array.to_device(self.queue, x_train) y_train_gpu = array.to_device(self.queue, y_train) # should probably check that our data won't exceed available device memory, # transparently queue up more data once it's been used losses = {'batch': [], 'validation': [], 'testing': []} for i in tqdm(range(epochs), desc='Epoch: ', position=0): # shuffle the rows if shuffle: self.shuffle(x_train_gpu.data, y_train_gpu.data, training_samples, input_features, output_features) for idx in tqdm(range(training_samples // batch_size), desc='Batch: ', position=1, unit=' batch'): idx = cltypes.uint(idx) # idx here is the batch number batch_x_gpu = x_train_gpu[idx * batch_size:idx * batch_size + batch_size] batch_y_gpu = y_train_gpu[idx * batch_size:idx * batch_size + batch_size] # copy all of these to the device? output = self.forward(batch_x_gpu, verbose=False) loss_val = loss.cpu(batch_y_gpu, output) # err = loss(batch_y_gpu, output, ) losses['batch'].append(loss_val) # print(f"Mean Batch Loss={loss_val}") optimizer(loss, self, batch_x_gpu, batch_y_gpu) # if idx % 900 == 0: # for c in callbacks: # if c.batch_end: # c(losses) # run the network and get error for the validation set # this should be a single batch of size validation_samples # will need to allocate specific validation arrays # if validation_user: # # validate with user supplied validation data # output = self.forward(x_val_gpu, 0) # should probably be done as a single batch, # val_loss = loss(y_val_gpu, output, 0) # else: # # idx is the index of the validation set start position # idx = len(x_train) - validation_samples # output = self.forward(x_train_gpu, idx) # val_loss = loss(y_train_gpu, output, idx) # losses['validation'].append(val_loss) # # collect metrics for training set # output = self.forward(x_test, 0) # test_loss = loss(y_test, output, 0) # losses['testing'].append(test_loss) for c in callbacks: c(losses) return losses
def __init__(self, queue, columnCount=2048, globalInhibition=1, inputWidth=500, inputActive=33, boostStrength=0.0, numActiveColumnsPerInhArea=40, potentialPct=.5, stimulusThreshold=0, seed=1956, dutyCyclePeriod=1000, spVerbosity=0, spatialImp='cl', synPermActiveInc=0.05, synPermConnected=0.10, synPermInactiveDec=0.008): if spatialImp != 'cl': raise ValueError( 'This implementation only supports OpenCL Temporal Memory') if globalInhibition != 1: raise ValueError( 'This implementation does not support local inhibition') self.columnCount = cltypes.uint(columnCount) self.globalInhibition = globalInhibition self.inputWidth = inputWidth self.boostStrength = boostStrength self.numActiveColumnPerInhArea = cltypes.uint( numActiveColumnsPerInhArea) self.potentialPct = cltypes.float(potentialPct) np.random.seed(seed) self.verbosity = spVerbosity self.synPermActiveInc = cltypes.float(synPermActiveInc) self.synPermConnected = cltypes.float(synPermConnected) self.synPermInactiveDec = cltypes.float(synPermInactiveDec) # store the TM as an array of int, either on or off self.columns = np.zeros(columnCount, dtype=cltypes.uint) self.synapsesPerColumn = cltypes.uint(inputWidth * potentialPct) self._stimulusThreshold = cltypes.uint(stimulusThreshold) self._dutyCyclePeriod = cltypes.uint(dutyCyclePeriod) self._activeDutyCycles = np.zeros(self.columnCount, cltypes.uint) self._overlapDutyCycles = np.zeros(self.columnCount, cltypes.uint) self._minOverlapDutyCycles = np.zeros(self.columnCount, cltypes.uint) self._boostFactors = np.ones(self.columnCount, dtype=cltypes.float) self._queue = queue self._ctx = queue.context self._updatePeriod = 50 synapse_struct = np.dtype([('permanence', cltypes.float), ('bitIdx', cltypes.uint)]) synapse_struct, synapse_struct_c_decl = cl.tools.match_dtype_to_c_struct( self._ctx.devices[0], "synapse_struct", synapse_struct) synapse_struct = cl.tools.get_or_register_dtype( 'synapse_struct', synapse_struct) overlap_struct = np.dtype([('overlap', cltypes.uint), ('boosted', cltypes.uint)]) overlap_struct, overlap_struct_c_decl = cl.tools.match_dtype_to_c_struct( self._ctx.devices[0], "overlap_struct", overlap_struct) self.overlap_struct = cl.tools.get_or_register_dtype( 'overlap_struct', overlap_struct) self.synapses = np.zeros( (columnCount * self.synapsesPerColumn), dtype=synapse_struct) # x is permanence value, y is input bit idx if spVerbosity >= 1: print( '------------CL SpatialPooler Parameters ------------------') # print("Synapse Struct", synapse_struct_c_decl) print("Synapses\t", self.synapses.size) print("Columns\t", self.columnCount) print("Input Width\t", self.inputWidth) print("Synapses Per Column\t", self.synapsesPerColumn) print("Synapse Connection Threshold\t", self.synPermConnected) self.synPermMin_ = 0.0 self.synPermMax_ = 1.0 self.synapses['permanence'] = np.clip( np.random.normal(synPermConnected, (self.synPermMax_ - self.synPermMin_) / 10, size=self.synapses.shape[0]).astype(np.float32), 0, 1) self.synapses_no_bit = self.synapses['permanence'] input_synapses = np.arange(0, inputWidth) for column in range(self.columnCount): idx = column * self.synapsesPerColumn self.synapses['bitIdx'][idx:idx + self.synapsesPerColumn] = np.random.choice( input_synapses, self.synapsesPerColumn, False) bits, counts = np.unique(self.synapses['bitIdx'], return_counts=True) # array mapping each input bit to it's synapses indexes max_count = np.max(counts) self.max_input_to_synapse = cltypes.int(max_count) self.input_bitIdx = np.full((len(counts) * max_count), -1, dtype=cltypes.int) for inputBitIdx in xrange(inputWidth): idx = inputBitIdx * max_count synapseIndexes = np.where( self.synapses['bitIdx'] == inputBitIdx)[0] self.input_bitIdx[idx:idx + synapseIndexes.size] = synapseIndexes # print("Connected synapses: ", np.where(self.synapses['permanence'] > synPermConnected)[0].size / float( # self.synapses['permanence'].size)) # each column connects to exactly columnCount*potentialPct inputs src = ''.join( [synapse_struct_c_decl, overlap_struct_c_decl, kernel_src]) self.prog = cl.Program(self._ctx, src).build() # print (map(lambda x: x.get_info(pyopencl.kernel_info.FUNCTION_NAME), self.prog.all_kernels())) self._iterationNum = 0 self._iterationLearnNum = 0 self._inhibitionRadius = self.columnCount self.synapseCount = self.synapsesPerColumn * self.columnCount if spVerbosity >= 1: # self._show_synapses() pass # initialise host buffers for commonly used things # we only copy stuff between host and device when we need to self.overlap = np.zeros( self.columnCount, dtype=cltypes.uint2) # array of overlap and boosted overlap scores self.cl_boost_factors = cl.Buffer(self._ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self._boostFactors) self.cl_overlap = cl.Buffer(self._ctx, mf.READ_WRITE, self.overlap.nbytes) encoding_temp = np.empty( inputWidth, dtype=cltypes.uchar) # output is a np.uint8 == cltypes.uchar self.cl_encoding = cl.Buffer(self._ctx, mf.READ_ONLY, size=encoding_temp.nbytes) self.active_bits = np.zeros(inputActive, dtype=cltypes.long) self.inputActive = inputActive self.cl_active_bits = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.active_bits) self.cl_synapses = cl.Buffer(self._ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.synapses) self.cl_input_bitIdx = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.input_bitIdx) self.cl_synapses_no_bit = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.synapses_no_bit)