class SpatialPooler: """ This class handles the mini-column structures and the feed forward proximal inputs to each cortical mini-column. [CITE THE SP PAPER HERE] Topology: This implements local inhibition with topology by creating many small groups of mini-columns which are distributed across the input space. All of the mini-columns in a group are located at the same location in the input space, and they inhibit each other equally. Each group of mini- columns is self contained; groups of mini-columns do not inhibit each other or interact. Instead of creating a large spatial pooler with topology, this creates many small spatial poolers with topology between the spatial poolers. """ def __init__(self, input_sdr, mini_columns, # Integer, sparsity, potential_pool, permanence_inc, permanence_dec, permanence_thresh, segments = 1, macro_columns = (1,), init_dist = (0, 0), boosting_alpha = None, active_thresh = 0, radii = tuple()): """ Argument mini_columns is an Integer, the number of mini-columns in each macro-column. Argument macro_columns is a tuple of integers. Dimensions of macro column array. These are topological dimensions. Macro columns are distributed across the input space in a uniform grid. Optional Argument radii defines the input topology. Trailing extra input dimensions are treated as non-topological dimensions. Argument segments is an Integer, number proximal segments for each mini-column. Argument sparsity ... Argument potential_pool ... Optional Argument boosting_alpha is the small constant used by the moving exponential average which tracks each mini-columns activation frequency. Default value is None, which disables boosting altogether. Argument permanence_inc ... Argument permanence_dec ... Argument permanence_thresh ... Argument init_dist is (mean, std) of initial permanence values, which is a gaussian random distribution. Argument active_thresh ... """ assert(isinstance(input_sdr, SDR)) assert(potential_pool > 1) # Number of synapses, not percent. self.mini_columns = int(round(mini_columns)) self.macro_columns = tuple(int(round(dim)) for dim in macro_columns) self.radii = radii self.segments = int(round(segments)) self.columns = SDR(self.macro_columns + (self.mini_columns,), activation_frequency_alpha = boosting_alpha, average_overlap_alpha = boosting_alpha,) self.sparsity = sparsity self.active_thresh = active_thresh self.potential_pool = potential_pool self.age = 0 segment_shape = self.macro_columns + (self.mini_columns, self.segments) self.synapses = SynapseManager( input_sdr = input_sdr, output_sdr = SDR(segment_shape), radii = radii, init_dist = init_dist, permanence_inc = permanence_inc, permanence_dec = permanence_dec, permanence_thresh = permanence_thresh, initial_potential_pool = self.potential_pool,) if init_dist == (0, 0): # Nupic's SP init method # TODO: Make this a permanent part of the synapses class? # Change init_dist argument to accept a string 'sp' ? for idx in range(self.synapses.output_sdr.size): pp = self.synapses.postsynaptic_permanences[idx].shape[0] connnected = np.random.random(pp) > .5 permanences = np.random.random(pp) permanences[connnected] *= 1 - self.synapses.permanence_thresh permanences[connnected] += self.synapses.permanence_thresh permanences[np.logical_not(connnected)] *= self.synapses.permanence_thresh self.synapses.postsynaptic_permanences[idx] = np.array(permanences, dtype=np.float32) self.synapses.rebuild_indexes() # Break ties randomly, in a constant unchanging manner. self.tie_breakers = np.random.uniform(0, .5, size=self.synapses.output_sdr.dimensions) self.boosting_alpha = boosting_alpha if boosting_alpha is not None: # Make a dedicated SDR to track segment activation frequencies for # boosting. self.boosting = SDR(self.synapses.output_sdr, activation_frequency_alpha = boosting_alpha, average_overlap_alpha = boosting_alpha,) # Initialize to the target activation frequency/sparsity. self.boosting.activation_frequency.fill(self.sparsity / self.segments) self.reset() def reset(self): self.columns.zero() self.prev_updates = np.full(self.synapses.output_sdr.size, None) def compute(self, input_sdr=None, input_learning_sdr=None, learn=True): """ """ excitement, potential_excitement = self.synapses.compute(input_sdr=input_sdr) excitement = excitement + self.tie_breakers # Logarithmic Boosting Function. if self.boosting_alpha is not None: target_sparsity = self.sparsity / self.segments boost = np.log2(self.boosting.activation_frequency) / np.log2(target_sparsity) boost = np.nan_to_num(boost) excitement *= boost # Divide excitement by the number of connected synapses. n_con_syns = self.synapses.postsynaptic_connected_count n_con_syns = n_con_syns.reshape(self.synapses.output_sdr.dimensions) percent_overlap = excitement / n_con_syns # Reduce the segment dimension to each mini-columns single most excited # segment. column_excitement = np.max(percent_overlap, axis=-1) # Stable SP and Grid Cells modify the excitement here. column_excitement = self._compute_hook(column_excitement) # Activate mini-columns. First determine how many mini-columns to # activate in each macro-column. n_activate = max(1, int(round(self.mini_columns * self.sparsity))) # Activate the most excited mini-columns in each macro-column. k = self.mini_columns - n_activate mini_index = np.argpartition(column_excitement, k, axis=-1)[..., k:] # Convert activations from mini-column indices to macro-column indices. macro_index = tuple(np.indices(mini_index.shape))[:-1] winner_columns = tuple(x.reshape(-1) for x in macro_index + (mini_index,)) # Filter out columns with sub-threshold excitement. winner_excitement = np.max(excitement[winner_columns], axis=-1) winner_columns = tuple(np.compress(winner_excitement >= self.active_thresh, winner_columns, axis=1)) # Output the results into the columns sdr. self.columns.index = winner_columns if learn: seg_idx = np.argmax(excitement[winner_columns], axis=-1) learning_segments = winner_columns + (seg_idx,) self.prev_updates = self.synapses.learn( input_sdr = input_learning_sdr, output_sdr = learning_segments, prev_updates = self.prev_updates,) # Update the exponential moving average of each segments activation frequency. if self.boosting_alpha is not None: self.boosting.assign(learning_segments) self.age += 1 return self.columns def _compute_hook(self, x): """Subclasses override this method.""" return x def statistics(self, _class_name='Spatial Pooler'): stats = _class_name + ' ' stats += self.synapses.statistics() stats += 'Columns ' + self.columns.statistics() if self.boosting_alpha is not None: if self.segments > 1: stats += 'Segments ' + self.boosting.statistics() af = self.boosting.activation_frequency target = self.sparsity / self.segments boost_min = np.log2(np.min(af)) / np.log2(target) boost_mean = np.log2(np.mean(af)) / np.log2(target) boost_max = np.log2(np.max(af)) / np.log2(target) stats += '\tLogarithmic Boosting Multiplier min/mean/max {:-.04g}% / {:-.04g}% / {:-.04g}%\n'.format( boost_min * 100, boost_mean * 100, boost_max * 100,) return stats
class SDR_Classifier: """Maximum Likelyhood classifier for SDRs.""" def __init__(self, parameters, input_sdr, output_shape, output_type): """ Argument parameters must be an instance of SDRC_Parameters. Argument output_type must be one of: 'index', 'bool', 'pdf' """ self.args = parameters self.input_sdr = SDR( input_sdr ) # EEK! This copies the arguments current value instead of saving a reference to argument. self.output_shape = tuple(output_shape) self.output_type = output_type assert (self.output_type in ('index', 'bool', 'pdf')) # Don't initialize to zero, touch every input+output pair once or twice. self.stats = np.random.uniform(0, 5 * self.args.alpha, size=(self.input_sdr.size, ) + self.output_shape) self.age = 0 def train(self, input_sdr, out): """ Argument inp is tuple of index arrays, as output from SP's or TP's compute method inp = (ndarray of input space dim 0 indexes, ndarray of input space dim 1 indexes, ...) """ self.input_sdr.assign(input_sdr) inp = self.input_sdr.flat_index alpha = self.args.alpha self.stats[inp] *= (1 - alpha) # Decay # Update. if self.output_type == 'index': # try: for out_idx in out: self.stats[inp, out_idx] += alpha # except TypeError: # self.stats[inp + out] += alpha if self.output_type == 'bool': self.stats[inp, out] += alpha if self.output_type == 'pdf': updates = (out - self.stats[inp]) * alpha self.stats[inp] += updates self.age += 1 def predict(self, input_sdr=None): """ Argument inputs is ndarray of indexes into the input space. Returns probability of each catagory in output space. """ self.input_sdr.assign(input_sdr) pdf = self.stats[self.input_sdr.flat_index] if True: # Combine multiple probabilities into single pdf. Product, not # summation, to combine probabilities of independant events. The # problem with this is if a few unexpected bits turn on it # mutliplies the result by zero, and the test dataset is going to # have unexpected things in it. return np.product(pdf, axis=0, keepdims=False) else: # Use summation B/C it works well. return np.sum(pdf, axis=0, keepdims=False) def __str__(self): s = "SDR Classifier alpha %g\n" % self.args.alpha s += "\tInput -> Output shapes are", self.input_shape, '->', self.output_shape return s
class SpatialPooler: """ This class handles the mini-column structures and the feed forward proximal inputs to each cortical mini-column. This implementation is based on but differs from the one described by Numenta's Spatial Pooler white paper, (Cui, Ahmad, Hawkins, 2017, "The HTM Spatial Pooler - a neocortical...") in two main ways, the boosting function and the local inhibition mechanism. Logarithmic Boosting Function: This uses a logarithmic boosting function. Its input is the activation frequency which is in the range [0, 1] and its output is a boosting factor to multiply each columns excitement by. It's equation is: boost-factor = log( activation-frequency ) / log( target-frequency ) Some things to note: 1) The boost factor asymptotically approaches infinity as the activation frequency approaches zero. 2) The boost factor equals zero when the actiavtion frequency is one. 3) The boost factor for columns which are at the target activation frequency is one. 4) This mechanism has a single parameter: boosting_alpha which controls the exponential moving average which tracks the activation frequency. Fast Local Inhibition: This activates the most excited columns globally, after normalizing all columns by their local area mean and standard deviation. The local area is a gaussian window and the standard deviation of it is proportional to the deviation which is used to make the receptive fields of each column. Columns inhibit each other in proportion to the number of inputs which they share. In pseudo code: 1. mean_normalized = excitement - gaussian_blur( excitement, radius ) 2. standard_deviation = sqrt( gaussian_blur( mean_normalized ^ 2, radius )) 3. normalized = mean_normalized / standard_deviation 4. activate = top_k( normalized, sparsity * number_of_columns ) """ stability_st_period = 1000 stability_lt_period = 10 # Units: self.stability_st_period def __init__(self, parameters, input_sdr, column_sdr, radii=None, stability_sample_size=0, multisegment_experiment=None, init_dist=None,): """ Argument parameters is an instance of SpatialPoolerParameters. Argument input_sdr ... Argument column_sdr ... Argument radii is the standard deviation of the gaussian window which defines the local neighborhood of a column. The radii determine which inputs are likely to be in a columns potential pool. If radii is None then topology is disabled. See SynapseManager.normally_distributed_connections for details about topology. Argument stability_sample_size, set to 0 to disable stability monitoring, default is off. """ assert(isinstance(parameters, SpatialPoolerParameters)) assert(isinstance(input_sdr, SDR)) assert(isinstance(column_sdr, SDR)) self.args = args = parameters self.inputs = input_sdr self.columns = column_sdr self.topology = radii is not None self.age = 0 self.stability_schedule = [0] if stability_sample_size > 0 else [-1] self.stability_sample_size = stability_sample_size self.stability_samples = [] self.multisegment = multisegment_experiment is not None if self.multisegment: # EXPERIMENTIAL: Multi-segment proximal dendrites. self.segments_per_cell = int(round(multisegment_experiment)) self.proximal = SynapseManager( self.inputs, SDR(self.columns.dimensions + (self.segments_per_cell,), activation_frequency_alpha=args.boosting_alpha), # Used for boosting! permanence_inc = args.permanence_inc, permanence_dec = args.permanence_dec, permanence_thresh = args.permanence_thresh,) # Initialize to the target activation frequency/sparsity. self.proximal.outputs.activation_frequency.fill(args.sparsity / self.segments_per_cell) else: self.proximal = SynapseManager( self.inputs, self.columns, permanence_inc = args.permanence_inc, permanence_dec = args.permanence_dec, permanence_thresh = args.permanence_thresh,) if self.topology: r = self.proximal.normally_distributed_connections(args.potential_pool, radii, init_dist=init_dist) self.inhibition_radii = r else: self.proximal.uniformly_distributed_connections(args.potential_pool, init_dist=init_dist) if args.boosting_alpha is not None: # Make a dedicated SDR to track column activation frequencies for # boosting. self.boosting = SDR(self.columns, activation_frequency_alpha = args.boosting_alpha, # Note: average overlap is useful to know, but is not part of the boosting algorithm. average_overlap_alpha = args.boosting_alpha,) # Initialize to the target activation frequency/sparsity. self.boosting.activation_frequency.fill(args.sparsity) def compute(self, input_sdr=None): """ """ args = self.args if self.multisegment: # EXPERIMENT: Multi segment proximal dendrites. excitment = self.proximal.compute(input_sdr=input_sdr) # Logarithmic Boosting Function. if args.boosting_alpha is not None: target_sparsity = args.sparsity / self.segments_per_cell boost = np.log2(self.proximal.outputs.activation_frequency) / np.log2(target_sparsity) boost = np.nan_to_num(boost).reshape(self.proximal.outputs.dimensions) excitment = boost * excitment # Break ties randomly excitment = excitment + np.random.uniform(0, .5, size=self.proximal.outputs.dimensions) self.segment_excitement = excitment # Replace the segment dimension with each columns most excited segment. excitment = np.max(excitment, axis=-1) raw_excitment = excitment.reshape(-1) else: raw_excitment = self.proximal.compute(input_sdr=input_sdr).reshape(-1) # Logarithmic Boosting Function. if args.boosting_alpha is not None: boost = np.log2(self.boosting.activation_frequency) / np.log2(args.sparsity) boost = np.nan_to_num(boost) raw_excitment = boost * raw_excitment # Fast Local Inhibition if self.topology: inhibition_radii = self.inhibition_radii raw_excitment = raw_excitment.reshape(self.columns.dimensions) avg_local_excitment = scipy.ndimage.filters.gaussian_filter( # Truncate for speed raw_excitment, inhibition_radii, mode='reflect', truncate=3.0) local_excitment = raw_excitment - avg_local_excitment stddev = np.sqrt(scipy.ndimage.filters.gaussian_filter( local_excitment**2, inhibition_radii, mode='reflect', truncate=3.0)) raw_excitment = np.nan_to_num(local_excitment / stddev) raw_excitment = raw_excitment.reshape(-1) # EXPERIMENTIAL self.raw_excitment = raw_excitment # Activate the most excited columns. # # Note: excitements are not normally distributed, their local # neighborhoods use gaussian windows, which are a different thing. Don't # try to use a threshold, it won't work. Especially not: threshold = # scipy.stats.norm.ppf(1 - sparsity). k = self.columns.size * args.sparsity k = max(1, int(round(k))) self.columns.flat_index = np.argpartition(-raw_excitment, k-1)[:k] return self.columns def learn(self, input_sdr=None, column_sdr=None): """ Make the spatial pooler learn about its current inputs and active columns. """ if self.multisegment: # Learn about regular activations self.columns.assign(column_sdr) segment_excitement = self.segment_excitement[self.columns.index] seg_idx = np.argmax(segment_excitement, axis=-1) # seg_idx = np.random.choice(self.segments_per_cell, size=len(self.columns)) self.proximal.learn_outputs(input_sdr=input_sdr, output_sdr=self.columns.index + (seg_idx,)) else: # Update proximal synapses and their permanences. Also assigns into our column SDR. self.proximal.learn_outputs(input_sdr=input_sdr, output_sdr=column_sdr) # Update the exponential moving average of each columns activation frequency. self.boosting.assign(self.columns) # Book keeping. self.stability(self.inputs, self.columns.index) self.age += 1 def stabilize(self, prior_columns, percent): """ This activates prior columns to force active in order to maintain the given percent of column overlap between time steps. Always call this between compute and learn! """ # num_active = (len(self.columns) + len(prior_columns)) / 2 num_active = len(self.columns) overlap = self.columns.overlap(prior_columns) stabile_columns = int(round(num_active * overlap)) target_columns = int(round(num_active * percent)) add_columns = target_columns - stabile_columns if add_columns <= 0: return eligable_columns = np.setdiff1d(prior_columns.flat_index, self.columns.flat_index) eligable_excite = self.raw_excitment[eligable_columns] selected_col_nums = np.argpartition(-eligable_excite, add_columns-1)[:add_columns] selected_columns = eligable_columns[selected_col_nums] selected_index = np.unravel_index(selected_columns, self.columns.dimensions) # Learn. Note: selected columns will learn twice. The previously # active segments learn now, the current most excited segments in the # method SP.learn(). # Or learn not at all if theres a bug in my code... # if self.multisegment: # if hasattr(self, 'prior_segment_excitement'): # segment_excitement = self.prior_segment_excitement[selected_index] # seg_idx = np.argmax(segment_excitement, axis=-1) # self.proximal.learn_outputs(input_sdr=input_sdr, # output_sdr=selected_index + (seg_idx,)) # self.prev_segment_excitement = self.segment_excitement # else: # 1/0 self.columns.flat_index = np.concatenate([self.columns.flat_index, selected_columns]) def plot_boost_functions(self, beta = 15): # Generate sample points dc = np.linspace(0, 1, 10000) from matplotlib import pyplot as plt fig = plt.figure(1) ax = plt.subplot(111) log_boost = lambda f: np.log(f) / np.log(self.args.sparsity) exp_boost = lambda f: np.exp(beta * (self.args.sparsity - f)) logs = [log_boost(f) for f in dc] exps = [exp_boost(f) for f in dc] plt.plot(dc, logs, 'r', dc, exps, 'b') plt.title("Boosting Function Comparison \nLogarithmic in Red, Exponential in Blue (beta = %g)"%beta) ax.set_xlabel("Activation Frequency") ax.set_ylabel("Boost Factor") plt.show() def stability(self, input_sdr, output_sdr, diag=True): """ Measures the short and long term stability from compute's input stream. Do not call this directly! Instead set it up before and via SpatialPooler.__init__() and this will print the results to STDOUT. Argument input_sdr, output_sdr ... Attribute stability_sample_size is how many samples to take during each sample period. Attribute stability_samples is list of samples, where each sample is a list of pairs of (input_sdr, output_sdr). The index is how many (short term) sample periods ago the sample was taken. Attribute stability_schedule is a list of ages to take input/output samples at, in descending order so that the soonest sample age is at the end of the list. Append -1 to the schedule to disable stability monitoring. The final age in the schedule is special, on this age it calculates the stability and makes a new schedule for the next period. Class Attribute stability_st_period st == short term, lt == long term The stability period is how many compute cycles this SP will wait before recomputing the stability samples and comparing with the original results. This calculates two measures of stability: short and long term. The long term period is written in terms of the short term period. Class Attribute stability_lt_period Units: self.stability_st_period Attribute st_stability, lt_stability are the most recent measurements of short and long term stability, respectively. These are initialized to None. """ if self.stability_schedule[-1] != self.age: return else: self.stability_schedule.pop() if self.stability_schedule: # Not the final scheduled checkup. Take the given sample and return. self.stability_samples[0].append((input_sdr, output_sdr)) return # Else: calculate the stability and setup for the next period of # stability sampling & monitoring. assert(False) # This method probably won't work since changes to use SDR class... def overlap(a, b): a = set(zip(*a)) b = set(zip(*b)) overlap = len(a.intersection(b)) overlap_pct = overlap / min(len(a), len(b)) return overlap_pct # Rerun the samples through the machine. try: st_samples = self.stability_samples[1] except IndexError: self.st_stability = None # This happens when age < 2 x st_period else: st_rerun = [self.compute(inp, learn=False) for inp, out in st_samples] self.st_stability = np.mean([overlap(re, io[1]) for re, io in zip(st_rerun, st_samples)]) try: lt_samples = self.stability_samples[self.stability_lt_period] except IndexError: self.lt_stability = None # This happens when age < st_period X (lt_period + 1) else: lt_rerun = [self.compute(inp, learn=False) for inp, out in lt_samples] self.lt_stability = np.mean([overlap(re, io[1]) for re, io in zip(lt_rerun, lt_samples)]) # Make a new sampling schedule. sample_period = range(self.age + 1, self.age + self.stability_st_period) self.stability_schedule = random.sample(sample_period, self.stability_sample_size) # Add the next stability calculation to the end of the schedule. self.stability_schedule.append(sample_period.stop) self.stability_schedule.sort(reverse=True) # Roll the samples buffer. self.stability_samples.insert(0, []) self.stability_samples = self.stability_samples[:self.stability_lt_period + 1] # Print output if diag: s = "" if self.st_stability is not None: s += "Stability (%d) %-5.03g"%(self.stability_st_period, self.st_stability,) if self.lt_stability is not None: s += " | (x%d) %-5.03g"%(self.stability_lt_period, self.lt_stability) if s: print(s) def noise_perturbation(self, inp, flip_bits, diag=False): """ Measure the change in SDR overlap after moving some of the ON bits. """ tru = self.compute(inp, learn=False) # Make sparse input dense. if isinstance(inp, tuple) or inp.shape != self.args.input_dimensions: dense = np.zeros(self.args.input_dimensions) dense[inp] = True inp = dense # Move some of the on bits around. on_bits = list(zip(*np.nonzero(inp))) off_bits = list(zip(*np.nonzero(np.logical_not(inp)))) flip_bits = min(flip_bits, min(len(on_bits), len(off_bits)) ) flip_off = random.sample(on_bits, flip_bits) flip_on = random.sample(off_bits, flip_bits) noisy = np.array(inp, dtype=np.bool) # Force copy noisy[list(zip(*flip_off))] = False noisy[list(zip(*flip_on))] = True # Calculate the overlap in SP output after adding noise. near = self.compute(noisy, learn=False) tru = set(zip(*tru)) near = set(zip(*near)) overlap = len(tru.intersection(near)) overlap_pct = overlap / len(tru) if diag: print("SP Noise Robustness (%d flipped) %g"%(flip_bits, overlap_pct)) return overlap_pct def noise_robustness(self, inps, diag=False): """ Plot the noise robustness as a function. Argument 'inps' is list of encoded inputs. """ if False: # Range Num Samples Resolution # [0, 10) 20 .5 # [10, 50) 40 1 # [50, 100] 11 5 noises = list(np.arange(20) / 2) + list(np.arange(10, 40)) + list(np.arange(11) * 5 + 50) elif False: # Exponential progression of noises, samples many orders of magnitude of noise. num_samples = 50 x = np.exp(np.arange(num_samples)) noises = list(x * 100 / np.max(x)) else: # Number of ON bits in encoded input-space +1 nz = int(round(np.mean([np.count_nonzero(s) for s in inps[:10]]))) noises = list(np.arange(nz + 1)) cutoff = len(noises) // 10 # First 'cutoff' many samples have full accuracy. while len(noises) > 50 + cutoff: # Decimate to a sane number of sample points noises = noises[:cutoff] + noises[cutoff::2] pct_over = [] for n in noises: z = 0 for inp in inps: z += self.noise_perturbation(inp, n, diag=False) pct_over.append(z/len(inps)) if diag: from matplotlib import pyplot as plt plt.figure(1) plt.plot(noises, pct_over) plt.title('todo') plt.xlabel('todo') plt.ylabel('todo') plt.show() return noises, pct_over def statistics(self): stats = 'SP ' stats += self.proximal.statistics() if self.args.boosting_alpha is not None: stats += 'Columns ' + self.boosting.statistics() af = self.boosting.activation_frequency boost_min = np.log2(np.min(af)) / np.log2(self.args.sparsity) boost_mean = np.log2(np.mean(af)) / np.log2(self.args.sparsity) boost_max = np.log2(np.max(af)) / np.log2(self.args.sparsity) stats += '\tLogarithmic Boosting Multiplier min/mean/max {:-.04g}% / {:-.04g}% / {:-.04g}%\n'.format( boost_min * 100, boost_mean * 100, boost_max * 100,) # TODO: Stability, if enabled. pass # TODO: Noise robustness, if enabled. pass return stats