def __init__(self, queue, numBuckets, steps=[1], bits=2048, alpha=0.001, actValueAlpha=0.3, verbosity=False): self._prg = cl.Program(queue.context, kernel_src).build() self._learn_iteration = 0 self.bit_activations = np.zeros(bits, dtype=cltypes.uint) self.bucket_activations = np.zeros(numBuckets, dtype=cltypes.uint) self.steps = steps self.step_count = len(steps) self.alpha = cltypes.float(alpha) self.actValueAlpha = cltypes.float(actValueAlpha) self.bits = bits # number of bits in the input self._queue = queue # the opencl queue self._ctx = queue.context # the opencl context self._numBuckets = cltypes.uint(numBuckets) self._verbose = verbosity self._init_buffers = False
def __init__(self, queue, activationThreshold=14, cellsPerColumn=32, columnCount=2048, globalDecay=0.0, initialPerm=0.21, inputWidth=2048, maxAge=0, maxSegmentsPerCell=128, maxSynapsesPerSegment=32, minThreshold=11, newSynapseCount=20, outputType='normal', pamLength=3, permanenceDec=0.1, permanenceInc=0.1, seed=1960, temporalImp='cl', verbosity=0): if temporalImp != 'cl': raise ValueError('This implementation only supports OpenCL') self.activationThreshold = cltypes.uint(activationThreshold) self.columnCount = cltypes.uint(columnCount) self.cellsPerColumn = cltypes.uint(cellsPerColumn) self.globalDecay = cltypes.float(globalDecay) self.initialPerm = cltypes.float(initialPerm) self.maxAge = cltypes.uint(maxAge) self.maxSegmentsPerCell = cltypes.uint(maxSegmentsPerCell) self.maxSynapsesPerSegment = cltypes.uint(maxSynapsesPerSegment) self.minThreshold = cltypes.uint(minThreshold) self.newSynapseCount = cltypes.uint(newSynapseCount) self.outputType = outputType self.pamLength = cltypes.uint(pamLength) self.permanenceDec = cltypes.float(permanenceDec) self.permanenceInc = cltypes.float(permanenceInc) np.random.seed(seed) self.verbosity = verbosity self.columnCount = columnCount self.inputWidth = inputWidth self._queue = queue self._ctx = queue.context np.random.seed(seed) self._setup_cl_buffers()
def __call__(self, net, err, x_data, y_data): # perturb the weights of each layer randomly and evaluate # we need a copy of the initial layers weights temperature = self.temperature candidates: array.Array = [ array.empty_like(l.weights, self.queue) for l in net.layers ] currents: array.Array = [l.weights for l in net.layers] for u in range(self.max_updates): evs = [] for current, candidate in zip(currents, candidates): # add noise to candidate weights evs.append( self.add_krnl(self.queue, (candidate.size, ), None, current.data, candidate.data, cltypes.float(np.random.randn()))) for e in evs: e.wait() # get the new output using these weights buf = x_data for candidate, l in zip(candidates, net.layers): buf = l.forward(buf, weights=candidate.data) output = net.layers[-1].output candidate_err = self.loss.cpu(y_data, output) if candidate_err == np.nan: continue err_delta = err - candidate_err accept = candidate_err < err or np.exp( err_delta / temperature) - np.random.rand() > 0 # accept the candidate solution if accept: # print(f"Accepting {candidate_err} over {err},{'worse' if err_delta < 0 else 'better'}") err = candidate_err currents = candidates # else: # print(f"Rejecting candidate {candidate_err}") temperature *= self.cooling_rate for l, c in zip(net.layers, currents): l.weights = c
from nncl import optimizers import pyopencl as cl import numpy as np from pyopencl import cltypes, array device = cl.get_platforms()[1].get_devices()[0] ctx = cl.Context([device]) queue = cl.CommandQueue(ctx) def read_only_arr(numbytes): return cl.Buffer(ctx, cl.mem_flags.READ_ONLY, numbytes) if __name__ == "__main__": np.set_printoptions(suppress=True) anneal = optimizers.Anneal(queue, None) # make an array of floats a = np.arange(0, 32, dtype=cltypes.float) a_gpu = array.to_device(queue, a, allocator=read_only_arr) out_gpu = array.empty_like(a_gpu, queue) print(a) anneal.add_krnl(queue, (a.size,), None, a_gpu.data, out_gpu.data, cltypes.float(np.random.randn())).wait() print(out_gpu.get())
dtype=cltypes.float) colors_buf = cl.Buffer(ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=colors) color_width = 8.0 # Read source file with open(src_path, "r") as f: src = f.read() # Build program prg = cl.Program(ctx, src).build() # Execute OpenCL kernel on the device prg.render(queue, (width, height), None, image_buf, map_buf, colors_buf, *[cltypes.uint(x) for x in [width, height, depth, ssf]], cltypes.uint(len(colors)), cltypes.float(color_width)) # Copy rendered image to host cl.enqueue_copy(queue, image, image_buf) # Flush queue queue.flush() queue.finish() # Draw low-resolution image in terminal for row in np.mean(image[::32, ::16], axis=2): for x in row: print("@" if x > 1e-4 else ".", end="") print() # Save image to .png file
def compute(self, recordNum, pattern, classification, learn, infer): """ Computes 1 step :param recordNum: :param pattern: indices of active columns in the TM layer :param classification: dict of bucketIdx and actualValue :param learn: :param infer: :return: """ if self.verbosity: print(" recordNum:", recordNum) print(" patternNZ (%d):" % len(pattern), pattern) print(" classificationIn:", classification) bucketIdx, actValue = classification['bucketIdx'], classification[ 'actValue'] pattern = np.array(pattern).astype(cltypes.uint) self._patternNZHistory.append((recordNum, pattern)) retval = None if infer: retval = self.infer(pattern, classification) return retval if learn and bucketIdx is not None: cl_activeBitIdx = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=pattern) for learnRecordNum, learnPattern in self._patternNZHistory: error = dict() targetDist = np.zeros(self._numBuckets + 1, dtype=cltypes.float) targetDist[bucketIdx] = 1.0 for step, table in self._weights.iteritems(): # print("old table") # self._show_table(table) """ int* activeBitIdx float2 *table, // x=histogram, y=moving average float const alpha, // moving average alpha float const actualValue, // actual input value int const bucketIdx, // bucket that actualValue falls into int const bucketCount, bool const learn, bool const infer, __global float *predictions """ new_table = table.copy() cl_new_table = cl.Buffer(self._ctx, mf.WRITE_ONLY | mf.COPY_HOST_PTR, hostbuf=new_table) cl_table = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=table) if learn: cl_table = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=table) if infer: predictions = np.zeros(self._numBuckets, dtype=cl.array.vec.float2) cl_predictions = cl.Buffer(self._ctx, mf.READ_WRITE, predictions.nbytes) else: cl_predictions = cl.Buffer(self._ctx, mf.WRITE_ONLY, 1) self._prg.infer_compute(self._queue, (pattern.shape[0], ), None, cl_activeBitIdx, cl_table, cl_new_table, cltypes.float(self.actValueAlpha), cltypes.float(actValue), cltypes.uint(bucketIdx), cltypes.uint(self._numBuckets), cltypes.char(learn), cltypes.char(infer), cl_predictions) if learn: cl.enqueue_copy(self._queue, self.steps[step], cl_new_table).wait() if infer: cl.enqueue_copy(self._queue, predictions, cl_predictions).wait() print("Activations", self.bucket_activations) multiStepPredictions[step] = predictions['y'] / len( pattern) # the probability for each bucket print("Actual Values", multiStepPredictions['actualValues']) print("Probability", multiStepPredictions[step]) self.bucket_activations[bucketIdx] += 1 return multiStepPredictions
def __init__(self, learing_rate=0.0001, reg=0.003): self.learning_rate = cltypes.float(learing_rate) self.lr = self.learning_rate self.reg = cltypes.float(reg)
def __init__(self, queue, columnCount=2048, globalInhibition=1, inputWidth=500, inputActive=33, boostStrength=0.0, numActiveColumnsPerInhArea=40, potentialPct=.5, stimulusThreshold=0, seed=1956, dutyCyclePeriod=1000, spVerbosity=0, spatialImp='cl', synPermActiveInc=0.05, synPermConnected=0.10, synPermInactiveDec=0.008): if spatialImp != 'cl': raise ValueError( 'This implementation only supports OpenCL Temporal Memory') if globalInhibition != 1: raise ValueError( 'This implementation does not support local inhibition') self.columnCount = cltypes.uint(columnCount) self.globalInhibition = globalInhibition self.inputWidth = inputWidth self.boostStrength = boostStrength self.numActiveColumnPerInhArea = cltypes.uint( numActiveColumnsPerInhArea) self.potentialPct = cltypes.float(potentialPct) np.random.seed(seed) self.verbosity = spVerbosity self.synPermActiveInc = cltypes.float(synPermActiveInc) self.synPermConnected = cltypes.float(synPermConnected) self.synPermInactiveDec = cltypes.float(synPermInactiveDec) # store the TM as an array of int, either on or off self.columns = np.zeros(columnCount, dtype=cltypes.uint) self.synapsesPerColumn = cltypes.uint(inputWidth * potentialPct) self._stimulusThreshold = cltypes.uint(stimulusThreshold) self._dutyCyclePeriod = cltypes.uint(dutyCyclePeriod) self._activeDutyCycles = np.zeros(self.columnCount, cltypes.uint) self._overlapDutyCycles = np.zeros(self.columnCount, cltypes.uint) self._minOverlapDutyCycles = np.zeros(self.columnCount, cltypes.uint) self._boostFactors = np.ones(self.columnCount, dtype=cltypes.float) self._queue = queue self._ctx = queue.context self._updatePeriod = 50 synapse_struct = np.dtype([('permanence', cltypes.float), ('bitIdx', cltypes.uint)]) synapse_struct, synapse_struct_c_decl = cl.tools.match_dtype_to_c_struct( self._ctx.devices[0], "synapse_struct", synapse_struct) synapse_struct = cl.tools.get_or_register_dtype( 'synapse_struct', synapse_struct) overlap_struct = np.dtype([('overlap', cltypes.uint), ('boosted', cltypes.uint)]) overlap_struct, overlap_struct_c_decl = cl.tools.match_dtype_to_c_struct( self._ctx.devices[0], "overlap_struct", overlap_struct) self.overlap_struct = cl.tools.get_or_register_dtype( 'overlap_struct', overlap_struct) self.synapses = np.zeros( (columnCount * self.synapsesPerColumn), dtype=synapse_struct) # x is permanence value, y is input bit idx if spVerbosity >= 1: print( '------------CL SpatialPooler Parameters ------------------') # print("Synapse Struct", synapse_struct_c_decl) print("Synapses\t", self.synapses.size) print("Columns\t", self.columnCount) print("Input Width\t", self.inputWidth) print("Synapses Per Column\t", self.synapsesPerColumn) print("Synapse Connection Threshold\t", self.synPermConnected) self.synPermMin_ = 0.0 self.synPermMax_ = 1.0 self.synapses['permanence'] = np.clip( np.random.normal(synPermConnected, (self.synPermMax_ - self.synPermMin_) / 10, size=self.synapses.shape[0]).astype(np.float32), 0, 1) self.synapses_no_bit = self.synapses['permanence'] input_synapses = np.arange(0, inputWidth) for column in range(self.columnCount): idx = column * self.synapsesPerColumn self.synapses['bitIdx'][idx:idx + self.synapsesPerColumn] = np.random.choice( input_synapses, self.synapsesPerColumn, False) bits, counts = np.unique(self.synapses['bitIdx'], return_counts=True) # array mapping each input bit to it's synapses indexes max_count = np.max(counts) self.max_input_to_synapse = cltypes.int(max_count) self.input_bitIdx = np.full((len(counts) * max_count), -1, dtype=cltypes.int) for inputBitIdx in xrange(inputWidth): idx = inputBitIdx * max_count synapseIndexes = np.where( self.synapses['bitIdx'] == inputBitIdx)[0] self.input_bitIdx[idx:idx + synapseIndexes.size] = synapseIndexes # print("Connected synapses: ", np.where(self.synapses['permanence'] > synPermConnected)[0].size / float( # self.synapses['permanence'].size)) # each column connects to exactly columnCount*potentialPct inputs src = ''.join( [synapse_struct_c_decl, overlap_struct_c_decl, kernel_src]) self.prog = cl.Program(self._ctx, src).build() # print (map(lambda x: x.get_info(pyopencl.kernel_info.FUNCTION_NAME), self.prog.all_kernels())) self._iterationNum = 0 self._iterationLearnNum = 0 self._inhibitionRadius = self.columnCount self.synapseCount = self.synapsesPerColumn * self.columnCount if spVerbosity >= 1: # self._show_synapses() pass # initialise host buffers for commonly used things # we only copy stuff between host and device when we need to self.overlap = np.zeros( self.columnCount, dtype=cltypes.uint2) # array of overlap and boosted overlap scores self.cl_boost_factors = cl.Buffer(self._ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self._boostFactors) self.cl_overlap = cl.Buffer(self._ctx, mf.READ_WRITE, self.overlap.nbytes) encoding_temp = np.empty( inputWidth, dtype=cltypes.uchar) # output is a np.uint8 == cltypes.uchar self.cl_encoding = cl.Buffer(self._ctx, mf.READ_ONLY, size=encoding_temp.nbytes) self.active_bits = np.zeros(inputActive, dtype=cltypes.long) self.inputActive = inputActive self.cl_active_bits = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.active_bits) self.cl_synapses = cl.Buffer(self._ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.synapses) self.cl_input_bitIdx = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.input_bitIdx) self.cl_synapses_no_bit = cl.Buffer(self._ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.synapses_no_bit)