def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument('-t', '--time', type=int, default=5, help='Number of times to run through the training data.') parser.add_argument('--dataset', choices=('states', 'dictionary'), default='states') args = parser.parse_args(args = argv) # Load data. if args.dataset == 'states': dataset = state_names if verbose: print("Dataset is %d state names"%len(dataset)) elif args.dataset == 'dictionary': dataset = read_dictionary() dataset = random.sample(dataset, 500) if verbose: print("Dataset is dictionary words, sample size %d"%len(dataset)) dataset = sorted(dataset) word_ids = {word: idx for idx, word in enumerate(sorted(dataset))} confusion = np.zeros((len(dataset), len(dataset))) if verbose: print("Dataset: " + ", ".join('%d) %s'%idx_word for idx_word in enumerate(dataset))) # Construct TM. diagnostics_alpha = parameters['sp']['boosting_alpha'] enc = EnumEncoder(**parameters['enc']) enc.output_sdr = SDR(enc.output_sdr, average_overlap_alpha = diagnostics_alpha) sp = SpatialPooler( input_sdr = enc.output_sdr, **parameters['sp']) tm = TemporalMemory( column_sdr = sp.columns, anomaly_alpha = diagnostics_alpha, **parameters['tm']) sdrc = SDRClassifier(steps=[0], **parameters['tm_sdrc']) sdrc.compute(-1, [tm.active.size-1], # Initialize the table. classification={"bucketIdx": [len(dataset)-1], "actValue": [len(dataset)-1]}, learn=True, infer=False) def reset(): enc.output_sdr.zero() sp.reset() tm.reset() # Train. if verbose: train_cycles = args.time * sum(len(w) for w in dataset) print("Training for %d cycles (%d dataset iterations)"%(train_cycles, args.time)) for i in range(args.time): random.shuffle(dataset) for word in dataset: reset() for idx, char in enumerate(word): enc.encode(char) sp.compute() tm.compute() lbl = word_ids[word] sdrc.compute(tm.age, tm.learning.flat_index, classification={"bucketIdx": lbl, "actValue": lbl}, learn=True, infer=False) if verbose: print("Encoder", enc.output_sdr.statistics()) print(sp.statistics()) print(tm.statistics()) # Test. score = 0. score_samples = 0 for word in dataset: reset() for idx, char in enumerate(word): enc.encode(char) sp.compute(learn = False) tm.compute(learn = False) inference = sdrc.infer(tm.active.flat_index, None) lbl = word_ids[word] if lbl == np.argmax(inference[0]): score += 1 score_samples += 1 confusion[lbl] += inference[0] print("Score:", 100. * score / score_samples, '%') if synapses_debug: tm.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") if verbose: import matplotlib.pyplot as plt plt.figure('Confusion Matrix') plt.imshow(confusion, interpolation='nearest') plt.xlabel('Prediction') plt.ylabel('Label') plt.show() return score / score_samples
def main(parameters=default_parameters, argv=None, verbose=True): parser = argparse.ArgumentParser() parser.add_argument( '-t', '--time', type=float, default=1, help='Number of times to run through the training data.') parser.add_argument('--debug', action='store_true') args = parser.parse_args(args=argv) # Load data. train_labels, train_images, test_labels, test_images = load_mnist() if False: # Experiment to verify that input dimensions are handled correctly If # you enable this, don't forget to rescale the radii as well as the # input. from scipy.ndimage import zoom new_sz = (1, 4, 1) train_images = [zoom(im, new_sz, order=0) for im in train_images] test_images = [zoom(im, new_sz, order=0) for im in test_images] training_data = list(zip(train_images, train_labels)) test_data = list(zip(test_images, test_labels)) random.shuffle(training_data) random.shuffle(test_data) if args.debug and args.time < 1: test_data = test_data[:int(len(test_data) * args.time)] # Setup spatial pooler machine. enc = BWImageEncoder(train_images[0].shape[:2]) sp = SpatialPooler(input_sdr=enc.output, segments=1, **parameters) sdrc = SDRClassifier(steps=[0]) if verbose: print(sp.statistics()) # Training Loop train_cycles = len(train_images) * args.time if verbose: print("Training for %d cycles" % train_cycles) for i in range(int(round(train_cycles))): sp.reset() img, lbl = random.choice(training_data) img = synthesize(img, diag=False) enc.encode(np.squeeze(img)) sp.compute() sdrc.compute(i, sp.columns.flat_index, classification={ "bucketIdx": lbl, "actValue": lbl }, learn=True, infer=False) if verbose: print("Done training.") print("") print("Removing zero permanence synapses.") sp.synapses.remove_zero_permanence_synapses() print(sp.statistics()) # Testing Loop if verbose: print("Testing for %d cycles." % len(test_data)) score = 0 for img, lbl in test_data: enc.encode(np.squeeze(img)) sp.compute(learn=False) try: inference = sdrc.infer(sp.columns.flat_index, None)[0] except IndexError: inference = np.zeros(10) if lbl == np.argmax(inference): score += 1 print('Score:', 100 * score / len(test_data), '%') if synapses_debug: sp.synapses.check_data_integrity() print("Synapse data structure integrity is OK.") return score / len(test_data)