示例#1
0
def main2():
    """
	Use one class SVM for multi-class classification
	
	Accuracy = 71.45%
	"""

    # Initializations
    seed = 123456789
    np.random.seed(seed)
    ntrain, ntest = 800, 200
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    tr, te = [], []
    for i in xrange(10):
        tr.append(np.random.permutation(tr_x[tr_y == i])[:ntrain])
        te.append(np.random.permutation(te_x[te_y == i])[:ntest])

    # Train the classifiers and get their results
    clfs = []
    for i in xrange(10):
        clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed)
        clf.fit(tr[i])
        clfs.append(clf)

    # Test the classifiers
    te_x = np.vstack(te)
    te_y = np.hstack([np.array([i] * ntest) for i in xrange(10)])
    results = np.zeros((10, len(te_y)))
    for i in xrange(10):
        results[i] = clfs[i].decision_function(te_x).flatten() + \
         np.random.uniform(0.1, 0.2, len(te_y))
    print np.sum(np.argmax(results, 0) == te_y) / float(len(te_y))
示例#2
0
def full_cv(base_dir):
    """Run the MNIST experiment. Iterate over each CV.

    @param base_dir: The full path to the base directory. This directory should
    contain the config as well as the pickled data.
    """
    # Get the keyword arguments for the SP
    with open(os.path.join(base_dir, 'config.json'), 'r') as f:
        kargs = json.load(f)
    kargs['clf'] = LinearSVC(random_state=kargs['seed'])

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))

    # Get the CV splits
    with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f:
        cv = pickle.load(f)

    # Execute each run
    for tr, te in cv:
        clf = SPRegion(**kargs)
        clf.fit(x[tr], y[tr])

        # Column accuracy
        clf.score(x[te], y[te])

        # Probabilistic accuracy
        clf.score(x[te], y[te], tr_x=x[tr], score_method='prob')

        # Dimensionality reduction method
        clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
        ndims = len(clf.reduce_dimensions(x[0]))
        clf._log_stats('Number of New Dimensions', ndims)
示例#3
0
def main():
    """
	Use a linear SVM for multi-class classification.
	
	One vs the rest : 77.61%
	Default         : 77.61%
	One vs one      : 85.07%
	"""

    seed = 123456789
    np.random.seed(seed)
    ntrain, ntest = 800, 200
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
    cv = MNISTCV(tr_y, te_y, ntrain, ntest, 1, seed)

    for tr, te in cv:
        clf = OneVsRestClassifier(LinearSVC(random_state=seed), -1)
        clf.fit(x[tr], y[tr])
        print clf.score(x[te], y[te])

        clf = LinearSVC(random_state=seed)
        clf.fit(x[tr], y[tr])
        print clf.score(x[te], y[te])

        clf = OneVsOneClassifier(LinearSVC(random_state=seed), -1)
        clf.fit(x[tr], y[tr])
        print clf.score(x[te], y[te])
示例#4
0
文件: mnist.py 项目: johnrobinsn/mHTM
def score_grid():
	"""
	Classify with the gridded SP.
	"""
	
	p = 'results\\mnist_filter'
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	
	# Get the SPs
	sps = [load(os.path.join(p, sp)) for sp in os.listdir(p) if sp[2] == '0']
	sp2 = load(os.path.join(p, 'sp1-0.pkl'))
	
	nwindows = 26 ** 2
	nfeat = 100 * nwindows
	
	# w = [sp2.p[sp2.syn_map == j] for j in xrange(nfeat)]
	# ms = max(wi.shape[0] for wi in w)
	# with open(os.path.join(p, 'data.pkl'), 'wb') as f:
		# cPickle.dump((w, ms), f, cPickle.HIGHEST_PROTOCOL)
	with open(os.path.join(p, 'data.pkl'), 'rb') as f:
		w, ms = cPickle.load(f)
	
	# Get training data
	tr_x2 = np.zeros((tr_x.shape[0], nfeat))
	for i, x in enumerate(tr_x):
		nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(
			nwindows, 9)
		x = np.array(np.zeros(nfeat), dtype='bool')
		for j, (xi, sp) in enumerate(izip(nx, sps)):
			sp.step(xi)
			x[j*100:(j*100)+100] = sp.y[:, 0]
		
		y = sp2.p * x[sp2.syn_map]
		w = np.zeros((nfeat, ms))
		for j in xrange(nfeat):
			a = y[sp2.syn_map == j]
			w[j][:a.shape[0]] = a
		tr_x2[i] = np.mean(w, 1)
	
	# Get testing data
	te_x2 = np.zeros((te_x.shape[0], nfeat))
	for i, x in enumerate(te_x):
		nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(
			nwindows, 9)
		x = np.array(np.zeros(nfeat), dtype='bool')
		for j, (xi, sp) in enumerate(izip(nx, sps)):
			sp.step(xi)
			x[j*100:(j*100)+100] = sp.y[:, 0]
		
		y = sp2.p * x[sp2.syn_map]
		w = np.zeros((nfeat, ms))
		for j in xrange(nfeat):
			a = y[sp2.syn_map == j]
			w[j][:a.shape[0]] = a
		te_x2[i] = np.mean(w, 1)
	
	# Classify
	clf = LinearSVC(random_state=123456789)
	clf.fit(tr_x2, tr_y)
	print 'SVM Accuracy : {0:2.2f} %'.format(clf.score(te_x2, te_y) * 100)
示例#5
0
def full_mnist(base_dir, new_dir, auto_update=False):
	"""
	Execute a full MNIST run using the parameters specified by ix.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config.
	
	@param new_dir: The full path of where the data should be saved.
	
	@param auto_update: If True the permanence increment and decrement amounts
	will automatically be computed by the runner. If False, the ones specified
	in the config file will be used.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config.json'), 'rb') as f:
		kargs = json.load(f)
	kargs['log_dir'] = new_dir
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()

	# Manually compute the permanence update amounts
	if auto_update:
		# Compute average sum of each training instance
		avg_s = tr_x.sum(1)
		
		# Compute the total average sum
		avg_ts = avg_s.mean()
		
		# Compute the average active probability
		a_p = avg_ts / float(tr_x.shape[1])
		
		# Compute the scaling factor
		scaling_factor = 1 / avg_ts
		
		# Compute the update amounts
		pinc = scaling_factor * (1 / a_p)
		pdec = scaling_factor * (1 / (1 - a_p))
		
		# Update the config
		kargs['pinc'], kargs['pdec'] = pinc, pdec
	
	# Execute
	clf = SPRegion(**kargs)
	clf.fit(tr_x, tr_y)
	
	# Column accuracy
	clf.score(te_x, te_y)
	
	# Probabilistic accuracy
	clf.score(te_x, te_y, tr_x=tr_x, score_method='prob')
	
	# Dimensionality reduction method
	clf.score(te_x, te_y, tr_x=tr_x, score_method='reduction')
	ndims = len(clf.reduce_dimensions(tr_x[0]))
	clf._log_stats('Number of New Dimensions', ndims)
示例#6
0
def score_grid():
    """
	Classify with the gridded SP.
	"""

    p = 'results\\mnist_filter'
    (tr_x, tr_y), (te_x, te_y) = load_mnist()

    # Get the SPs
    sps = [load(os.path.join(p, sp)) for sp in os.listdir(p) if sp[2] == '0']
    sp2 = load(os.path.join(p, 'sp1-0.pkl'))

    nwindows = 26**2
    nfeat = 100 * nwindows

    # w = [sp2.p[sp2.syn_map == j] for j in xrange(nfeat)]
    # ms = max(wi.shape[0] for wi in w)
    # with open(os.path.join(p, 'data.pkl'), 'wb') as f:
    # cPickle.dump((w, ms), f, cPickle.HIGHEST_PROTOCOL)
    with open(os.path.join(p, 'data.pkl'), 'rb') as f:
        w, ms = cPickle.load(f)

    # Get training data
    tr_x2 = np.zeros((tr_x.shape[0], nfeat))
    for i, x in enumerate(tr_x):
        nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(nwindows, 9)
        x = np.array(np.zeros(nfeat), dtype='bool')
        for j, (xi, sp) in enumerate(izip(nx, sps)):
            sp.step(xi)
            x[j * 100:(j * 100) + 100] = sp.y[:, 0]

        y = sp2.p * x[sp2.syn_map]
        w = np.zeros((nfeat, ms))
        for j in xrange(nfeat):
            a = y[sp2.syn_map == j]
            w[j][:a.shape[0]] = a
        tr_x2[i] = np.mean(w, 1)

    # Get testing data
    te_x2 = np.zeros((te_x.shape[0], nfeat))
    for i, x in enumerate(te_x):
        nx = extract_patches_2d(x.reshape(28, 28), (3, 3)).reshape(nwindows, 9)
        x = np.array(np.zeros(nfeat), dtype='bool')
        for j, (xi, sp) in enumerate(izip(nx, sps)):
            sp.step(xi)
            x[j * 100:(j * 100) + 100] = sp.y[:, 0]

        y = sp2.p * x[sp2.syn_map]
        w = np.zeros((nfeat, ms))
        for j in xrange(nfeat):
            a = y[sp2.syn_map == j]
            w[j][:a.shape[0]] = a
        te_x2[i] = np.mean(w, 1)

    # Classify
    clf = LinearSVC(random_state=123456789)
    clf.fit(tr_x2, tr_y)
    print 'SVM Accuracy : {0:2.2f} %'.format(clf.score(te_x2, te_y) * 100)
示例#7
0
def full_mnist(base_dir, new_dir, auto_update=False):
	"""
	Execute a full MNIST run using the parameters specified by ix.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config.
	
	@param new_dir: The full path of where the data should be saved.
	
	@param auto_update: If True the permanence increment and decrement amounts
	will automatically be computed by the runner. If False, the ones specified
	in the config file will be used.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config.json'), 'rb') as f:
		kargs = json.load(f)
	kargs['log_dir'] = new_dir
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()

	# Manually compute the permanence update amounts
	if auto_update:
		# Compute average sum of each training instance
		avg_s = tr_x.sum(1)
		
		# Compute the total average sum
		avg_ts = avg_s.mean()
		
		# Compute the average active probability
		a_p = avg_ts / float(tr_x.shape[1])
		
		# Compute the scaling factor
		scaling_factor = 1 / avg_ts
		
		# Compute the update amounts
		pinc = scaling_factor * (1 / a_p)
		pdec = scaling_factor * (1 / (1 - a_p))
		
		# Update the config
		kargs['pinc'], kargs['pdec'] = pinc, pdec
	
	# Execute
	clf = SPRegion(**kargs)
	clf.fit(tr_x, tr_y)
	
	# Column accuracy
	clf.score(te_x, te_y)
	
	# Probabilistic accuracy
	clf.score(te_x, te_y, tr_x=tr_x, score_method='prob')
	
	# Dimensionality reduction method
	clf.score(te_x, te_y, tr_x=tr_x, score_method='reduction')
	ndims = len(clf.reduce_dimensions(tr_x[0]))
	clf._log_stats('Number of New Dimensions', ndims)
示例#8
0
def main3(log_dir):
    """
	Use one class SP for multi-class classification
	
	Accuracy = 49.8%
	"""

    # Initializations
    seed = 123456789
    np.random.seed(seed)
    ntrain, ntest = 800, 200
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    tr, te = [], []
    for i in xrange(10):
        tr.append(np.random.permutation(tr_x[tr_y == i])[:ntrain])
        te.append(np.random.permutation(te_x[te_y == i])[:ntest])
    params = {
        'ninputs': 784,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'pct_active': None,
        'random_permanence': True,
        'pwindow': 0.5,
        'global_inhibition': True,
        'ncolumns': 784,
        'nactive': 78,
        'nsynapses': 100,
        'seg_th': 0,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'nepochs': 10,
        'log_dir': log_dir
    }
    metrics = SPMetrics()

    # Train the classifiers
    clfs = []
    base_results = []
    for clf, y in Parallel(n_jobs=-1)(delayed(_main3)(params, tr[i])
                                      for i in xrange(10)):
        clfs.append(clf)
        base_results.append(y)

    # Test the classifiers
    te_x = np.vstack(te)
    te_y = np.hstack([np.array([i] * ntest) for i in xrange(10)])
    results = np.array(
        Parallel(n_jobs=-1)(
            delayed(_main3_2)(clfs[i], te_x, base_results[i], seed)
            for i in xrange(10)))

    print np.sum(np.argmax(results, 0) == te_y) / float(len(te_y))
示例#9
0
def one_cv(base_dir, cv_split):
	"""
	Run the MNIST experiment. Only the specified CV split is executed.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config as well as the pickled data.
	
	@param cv_split: The index for the CV split.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config-{0}.json'.format(cv_split)),
		'rb') as f:
		kargs = json.load(f)
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Get the CV splits
	with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f:
		cv = cPickle.load(f)
	tr, te = cv[cv_split - 1]
	
	# Remove the split directory, if it exists
	shutil.rmtree(os.path.join(base_dir, str(cv_split)), True)
	
	# Execute
	clf = SPRegion(**kargs)
	clf.fit(x[tr], y[tr])
	
	# Column accuracy
	clf.score(x[te], y[te])
	
	# Probabilistic accuracy
	clf.score(x[te], y[te], tr_x=x[tr], score_method='prob')
	
	# Dimensionality reduction method
	clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
	ndims = len(clf.reduce_dimensions(x[0]))
	clf._log_stats('Number of New Dimensions', ndims)
示例#10
0
def one_cv(base_dir, cv_split):
	"""
	Run the MNIST experiment. Only the specified CV split is executed.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config as well as the pickled data.
	
	@param cv_split: The index for the CV split.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config-{0}.json'.format(cv_split)),
		'rb') as f:
		kargs = json.load(f)
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Get the CV splits
	with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f:
		cv = cPickle.load(f)
	tr, te = cv[cv_split - 1]
	
	# Remove the split directory, if it exists
	shutil.rmtree(os.path.join(base_dir, str(cv_split)), True)
	
	# Execute
	clf = SPRegion(**kargs)
	clf.fit(x[tr], y[tr])
	
	# Column accuracy
	clf.score(x[te], y[te])
	
	# Probabilistic accuracy
	clf.score(x[te], y[te], tr_x=x[tr], score_method='prob')
	
	# Dimensionality reduction method
	clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
	ndims = len(clf.reduce_dimensions(x[0]))
	clf._log_stats('Number of New Dimensions', ndims)
示例#11
0
def full_cv(base_dir):
	"""
	Run the MNIST experiment. Each CV split is executed sequentially.
	
	@param base_dir: The full path to the base directory. This directory should
	contain the config as well as the pickled data.
	"""
	
	# Get the keyword arguments for the SP
	with open(os.path.join(base_dir, 'config.json'), 'rb') as f:
		kargs = json.load(f)
	kargs['clf'] = LinearSVC(random_state=kargs['seed'])
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Get the CV splits
	with open(os.path.join(base_dir, 'cv.pkl'), 'rb') as f:
		cv = cPickle.load(f)
	
	# Execute each run
	for tr, te in cv:
		clf = SPRegion(**kargs)
		clf.fit(x[tr], y[tr])
		
		# Column accuracy
		clf.score(x[te], y[te])
		
		# Probabilistic accuracy
		clf.score(x[te], y[te], tr_x=x[tr], score_method='prob')
		
		# Dimensionality reduction method
		clf.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
		ndims = len(clf.reduce_dimensions(x[0]))
		clf._log_stats('Number of New Dimensions', ndims)
示例#12
0
def main(ntrain=800, ntest=200, nsplits=1, seed=1234567):
    # Set the configuration parameters for the SP
    ninputs = 784
    kargs = {
        'ninputs': ninputs,
        'ncolumns': ninputs,
        'nactive': 10,
        'global_inhibition': True,
        'trim': False,
        'seed': seed,
        'disable_boost': True,
        'nsynapses': 392,
        'seg_th': 10,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.002,
        'pwindow': 0.01,
        'random_permanence': True,
        'nepochs': 10,
        'clf': LinearSVC(random_state=seed),
        'log_dir': os.path.join('simple_mnist', '1-1')
    }

    # Seed numpy
    np.random.seed(seed)

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))

    # Split the data for CV
    cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)

    # Execute the SP on each fold. Additionally, get results for each fitting
    # method.
    for i, (tr, te) in enumerate(cv):
        # Create the region
        sp = SPRegion(**kargs)

        # Train the region
        sp.fit(x[tr], y[tr])

        # Test the base classifier
        clf = LinearSVC(random_state=seed)
        clf.fit(x[tr], y[tr])

    # Get a random set of unique inputs from the training set
    inputs = np.zeros((10, ninputs))
    for i in xrange(10):
        ix = np.random.permutation(np.where(y[tr] == i)[0])[0]
        inputs[i] = x[tr][ix]

    # Get the SP's predictions for the inputs
    sp_pred = sp.predict(inputs)

    # Get the reconstruction in the context of the SP
    sp_inputs = sp.reconstruct_input(sp_pred)

    # Make a plot comparing the images
    shape = (28, 28)
    path = os.path.join(sp.log_dir, 'input_reconstruction.png')
    plot_compare_images((inputs, sp_pred, sp_inputs), shape, out_path=path)
示例#13
0
def main(ntrain=800, ntest=200, nsplits=1, seed=123456789):
	# Set the configuration parameters for the SP
	ninputs = 784
	kargs = {
		'ninputs': ninputs,
		'ncolumns': ninputs,
		'nactive': 20,
		'global_inhibition': True,
		'trim': False,
		'seed': seed,
		
		'max_boost': 3,
		'duty_cycle': 8,
		
		'nsynapses': 392,
		'seg_th': 2,
		
		'syn_th': 0.5,
		'pinc': 0.01,
		'pdec': 0.02,
		'pwindow': 0.5,
		'random_permanence': True,
		
		'nepochs': 1,
		'clf': LinearSVC(random_state=seed),
		'log_dir': os.path.join('simple_mnist', '1-1')
	}
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Split the data for CV
	cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)
	
	# Execute the SP on each fold. Additionally, get results for each fitting
	# method.
	for i, (tr, te) in enumerate(cv):
		# Create the region
		sp = SPRegion(**kargs)
		
		# Train the region
		sp.fit(x[tr], y[tr])
		
		# Test the base classifier
		clf = LinearSVC(random_state=seed)
		clf.fit(x[tr], y[tr])
		score = clf.score(x[te], y[te])
		print 'SVM Only Accuracy: {0:.2f}%'.format(score * 100)
		
		# Test the region for the column method
		score = sp.score(x[te], y[te])
		print 'Column Accuracy: {0:.2f}%'.format(score * 100)
		
		# Test the region for the probabilistic method
		score = sp.score(x[te], y[te], tr_x=x[tr], score_method='prob')
		print 'Probabilistic Accuracy: {0:.2f}%'.format(score * 100)
		
		# Test the region for the dimensionality reduction method
		score = sp.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
		ndims = len(sp.reduce_dimensions(x[0]))
		print 'Input Reduced from {0} to {1}: {2:.1f}X reduction'.format(
			ninputs, ndims, ninputs / float(ndims))
		print 'Reduction Accuracy: {0:.2f}%'.format(score * 100)
	
	# Get a random set of unique inputs from the training set
	inputs = np.zeros((10, ninputs))
	for i in xrange(10):
		ix = np.random.permutation(np.where(y[tr] == i)[0])[0]
		inputs[i] = x[tr][ix]
	
	# Get the SP's predictions for the inputs
	sp_pred = sp.predict(inputs)
	
	# Get the reconstruction in the context of the SP
	sp_inputs = sp.reconstruct_input(sp_pred)
	
	# Make a plot comparing the two
	x1_labels = [str(i) for i in xrange(10)]
	x2_labels = [str(i) for i in xrange(10)]
	title = 'Input Reconstruction: Original (top), SP (bottom)'
	shape = (28, 28)
	path = os.path.join(sp.log_dir, 'input_reconstruction.png')
	plot_compare_images((inputs, sp_inputs), shape, title, (x1_labels,
		x2_labels,), path)
示例#14
0
def main(ntrain=800, ntest=200, nsplits=1, seed=123456789):
    """Run a simple MNIST classification task."""
    # Set the configuration parameters for the SP
    ninputs = 784
    kargs = {
        'ninputs': ninputs,
        'ncolumns': ninputs,
        'nactive': 30,
        'global_inhibition': True,
        'trim': False,
        'seed': seed,
        'disable_boost': True,
        'nsynapses': 392,
        'seg_th': 10,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.002,
        'pwindow': 0.01,
        'random_permanence': True,
        'nepochs': 10,
        'clf': LinearSVC(random_state=seed),
        'log_dir': os.path.join('simple_mnist', '1-1')
    }

    # Seed numpy
    np.random.seed(seed)

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))

    # Split the data for CV
    cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)

    # Execute the SP on each fold. Additionally, get results for each fitting
    # method.
    for i, (tr, te) in enumerate(cv):
        # Create the region
        sp = SPRegion(**kargs)

        # Train the region
        sp.fit(x[tr], y[tr])

        # Test the base classifier
        clf = LinearSVC(random_state=seed)
        clf.fit(x[tr], y[tr])
        score = clf.score(x[te], y[te])
        print('SVM Only Accuracy: {0:.2f}%'.format(score * 100))

        # Test the region for the column method
        score = sp.score(x[te], y[te])
        print('Column Accuracy: {0:.2f}%'.format(score * 100))

        # Test the region for the probabilistic method
        score = sp.score(x[te], y[te], tr_x=x[tr], score_method='prob')
        print('Probabilistic Accuracy: {0:.2f}%'.format(score * 100))

        # Test the region for the dimensionality reduction method
        score = sp.score(x[te], y[te], tr_x=x[tr], score_method='reduction')
        ndims = len(sp.reduce_dimensions(x[0]))
        print('Input Reduced from {0} to {1}: {2:.1f}X reduction'.format(
            ninputs, ndims, ninputs / float(ndims)))
        print('Reduction Accuracy: {0:.2f}%'.format(score * 100))

    # Get a random set of unique inputs from the training set
    inputs = np.zeros((10, ninputs))
    for i in range(10):
        ix = np.random.permutation(np.where(y[tr] == i)[0])[0]
        inputs[i] = x[tr][ix]

    # Get the SP's predictions for the inputs
    sp_pred = sp.predict(inputs)

    # Get the reconstruction in the context of the SP
    sp_inputs = sp.reconstruct_input(sp_pred)

    # Make a plot comparing the images
    title = 'Input Reconstruction: Original (top), SP SDRs (middle), ' \
        'SP Reconstruction (bottom)'
    shape = (28, 28)
    path = os.path.join(sp.log_dir, 'input_reconstruction.png')
    plot_compare_images((inputs, sp_pred, sp_inputs),
                        shape,
                        title,
                        out_path=path)
示例#15
0
def main(log_dir,
         ntrain=800,
         ntest=200,
         niter=10,
         nsplits=5,
         global_inhibition=True,
         seed=None):
    """
	Build the information needed to perform CV on a subset of the MNIST
	dataset.

	@param log_dir: The directory to store the results in.
	
	@param ntrain: The number of training samples to use.
	
	@param ntest: The number of testing samples to use.
	
	@param niter: The number of parameter iterations to use.
	
	@param nsplits: The number of splits of the data to use.
	
	@param global_inhibition: If True use global inhibition; otherwise, use
	local inhibition.
	
	@param seed: The seed for the random number generators.
	
	@return: The full set of X, the full set of Y, the keyword arguments for
	the classifier, the params for CV, and the CV.
	"""

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
    cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)

    # Create static parameters
    ninputs = tr_x.shape[1]
    kargs = {
        # Region parameters
        'ninputs': ninputs,
        'global_inhibition': global_inhibition,
        'trim': 1e-4,
        'seed': seed,

        # Synapse parameters
        'syn_th': 0.5,
        'random_permanence': True,

        # Fitting parameters
        'nepochs': 30,
        'clf': LinearSVC(random_state=seed)
        # NOTE: The SVM's will be identical, despite being seeded now
    }

    # Come up with some parameters to search
    param_distributions = {
        # Region parameters
        'ncolumns': randint(100, 1001),
        'nactive': uniform(0, 0.2),
        # As a percentage of the number of columns

        # Column parameters
        'max_boost': randint(1, 21),
        'duty_cycle': randint(10, 1001),

        # Segment parameters
        'nsynapses': randint(1, ninputs + 1),
        'seg_th': uniform(0, 0.1),
        # As a percentage of the number of synapses

        # Synapse parameters
        'pinc': uniform(0.001, 0.1),
        'pdec': uniform(0.001, 0.1),
        'pwindow': uniform(0.001, 0.1),

        # Fitting parameters
        'log_dir': log_dir
    }

    # Build the parameter generator
    gen = ParamGenerator(param_distributions, niter, nsplits, ninputs)
    params = {key: gen for key in param_distributions}

    return x, y, kargs, params, cv
示例#16
0
def first_level(log_dir, ntrain=800, ntest=200, nsplits=1, seed=123456789):
    # Details of the filter
    win_size = 7
    total_win_size = win_size * win_size
    nwindows = 16

    # SP arguments
    kargs = {
        'ninputs': total_win_size,
        'ncolumns': 200,
        'nactive': 50,
        'global_inhibition': True,
        'trim': 1e-4,
        'disable_boost': True,
        'seed': seed,
        'nsynapses': 35,
        'seg_th': 5,
        'syn_th': 0.5,
        'pinc': 0.001,
        'pdec': 0.001,
        'pwindow': 0.5,
        'random_permanence': True,
        'nepochs': 10,
        'log_dir': os.path.join(log_dir, '1-1')
    }

    # Get the data
    (tr_x, tr_y), (te_x, te_y) = load_mnist()
    x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))

    # Split the data for CV
    tr, te = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed).gen.next()
    tr, te = tr[:ntrain], te[:ntest]

    # Store the labels to disk
    with open(os.path.join(log_dir, 'labels.pkl'), 'wb') as f:
        cPickle.dump((y[tr], y[te]), f, cPickle.HIGHEST_PROTOCOL)
    del tr_y
    del te_y
    del y

    # Build the training data
    train_data = np.zeros((nwindows, ntrain, total_win_size), dtype='bool')
    for i in xrange(ntrain):
        xi = x[tr[i]]
        for j, window in enumerate(get_windows(xi.reshape(28, 28), win_size)):
            train_data[j, i] = window

    # Build the testing data
    test_data = np.zeros((nwindows, ntest, total_win_size), dtype='bool')
    for i in xrange(ntest):
        xi = x[te[i]]
        for j, window in enumerate(get_windows(xi.reshape(28, 28), win_size)):
            test_data[j, i] = window
    del tr_x
    del te_x
    del x

    # Make the SPs
    sps = [SPRegion(**kargs) for _ in xrange(nwindows)]

    # Execute the SPs in parallel
    Parallel(n_jobs=-1)(delayed(execute)(sp, tr, te)
                        for sp, tr, te in izip(sps, train_data, test_data))
示例#17
0
def base_experiment(config, ntrials=1, seed=123456789):
	"""
	Run a single experiment, locally.
		
	@param config: The configuration parameters to use for the SP.
	
	@param ntrials: The number of times to repeat the experiment.
	
	@param seed: The random seed to use.
	
	@return: A tuple containing the percentage errors for the SP's training
	and testing results and the SVM's training and testing results,
	respectively.
	"""
	
	# Base parameters
	ntrain, ntest = 800, 200
	clf_th = 0.5
	
	# Seed numpy
	np.random.seed(seed)
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	tr_x_0 = np.random.permutation(tr_x[tr_y == 0])
	x_tr = tr_x_0[:ntrain]
	x_te = tr_x_0[ntrain:ntrain + ntest]
	outliers = [np.random.permutation(tr_x[tr_y == i])[:ntest] for i in
		xrange(1, 10)]
	
	# Metrics
	metrics = SPMetrics()
	
	# Get the metrics for the datasets
	u_x_tr = metrics.compute_uniqueness(x_tr)
	o_x_tr = metrics.compute_overlap(x_tr)
	c_x_tr = 1 - metrics.compute_distance(x_tr)
	u_x_te = metrics.compute_uniqueness(x_te)
	o_x_te = metrics.compute_overlap(x_te)
	c_x_te = 1 - metrics.compute_distance(x_te)
	u_y_te, o_y_te, c_y_te = [], [], []
	for outlier in outliers:
		u_y_te.append(metrics.compute_uniqueness(outlier))
		o_y_te.append(metrics.compute_overlap(outlier))
		c_y_te.append(1 - metrics.compute_distance(outlier))
	
	# Initialize the overall results
	sp_x_results = np.zeros(ntrials)
	sp_y_results = [np.zeros(ntrials) for _ in xrange(9)]
	svm_x_results = np.zeros(ntrials)
	svm_y_results = [np.zeros(ntrials) for _ in xrange(9)]
	
	# Iterate across the trials:
	for nt in xrange(ntrials):
		# Make a new seeod
		seed2 = np.random.randint(1000000)
		config['seed'] = seed2
		
		# Create the SP
		sp = SPRegion(**config)
		
		# Fit the SP
		sp.fit(x_tr)
		
		# Get the SP's output
		sp_x_tr = sp.predict(x_tr)
		sp_x_te = sp.predict(x_te)
		sp_y_te = [sp.predict(outlier) for outlier in outliers]
		
		# Get the metrics for the SP's results
		u_sp_x_tr = metrics.compute_uniqueness(sp_x_tr)
		o_sp_x_tr = metrics.compute_overlap(sp_x_tr)
		c_sp_x_tr = 1 - metrics.compute_distance(sp_x_tr)
		u_sp_x_te = metrics.compute_uniqueness(sp_x_te)
		o_sp_x_te = metrics.compute_overlap(sp_x_te)
		c_sp_x_te = 1 - metrics.compute_distance(sp_x_te)
		u_sp_y_te, o_sp_y_te, c_sp_y_te = [], [], []
		for y in sp_y_te:
			u_sp_y_te.append(metrics.compute_uniqueness(y))
			o_sp_y_te.append(metrics.compute_overlap(y))
			c_sp_y_te.append(1 - metrics.compute_distance(y))
		
		# Log all of the metrics
		sp._log_stats('Input Base Class Train Uniqueness', u_x_tr)
		sp._log_stats('Input Base Class Train Overlap', o_x_tr)
		sp._log_stats('Input Base Class Train Correlation', c_x_tr)
		sp._log_stats('Input Base Class Test Uniqueness', u_x_te)
		sp._log_stats('Input Base Class Test Overlap', o_x_te)
		sp._log_stats('Input Base Class Test Correlation', c_x_te)
		sp._log_stats('SP Base Class Train Uniqueness', u_sp_x_tr)
		sp._log_stats('SP Base Class Train Overlap', o_sp_x_tr)
		sp._log_stats('SP Base Class Train Correlation', c_sp_x_tr)
		sp._log_stats('SP Base Class Test Uniqueness', u_sp_x_te)
		sp._log_stats('SP Base Class Test Overlap', o_sp_x_te)
		sp._log_stats('SP Base Class Test Correlation', c_sp_x_te)
		for i, (a, b, c, d, e, f) in enumerate(zip(u_y_te, o_y_te, c_y_te,
			u_sp_y_te, o_sp_y_te, c_sp_y_te), 1):
			sp._log_stats('Input Novelty Class {0} Uniqueness'.format(i), a)
			sp._log_stats('Input Novelty Class {0} Overlap'.format(i), b)
			sp._log_stats('Input Novelty Class {0} Correlation'.format(i), c)	
			sp._log_stats('SP Novelty Class {0} Uniqueness'.format(i), d)
			sp._log_stats('SP Novelty Class {0} Overlap'.format(i), e)
			sp._log_stats('SP Novelty Class {0} Correlation'.format(i), f)
		
		# Get average representation of the base class
		sp_base_result = np.mean(sp_x_tr, 0)
		sp_base_result[sp_base_result >= 0.5] = 1
		sp_base_result[sp_base_result < 1] = 0
		
		# Averaged results for each metric type
		u_sp_base_to_x_te = 0.
		o_sp_base_to_x_te = 0.
		c_sp_base_to_x_te = 0.
		u_sp, o_sp, c_sp = np.zeros(9), np.zeros(9), np.zeros(9)
		for i, x in enumerate(sp_x_te):
			xt = np.vstack((sp_base_result, x))
			u_sp_base_to_x_te += metrics.compute_uniqueness(xt)
			o_sp_base_to_x_te += metrics.compute_overlap(xt)
			c_sp_base_to_x_te += 1 - metrics.compute_distance(xt)
			
			for j, yi in enumerate(sp_y_te):
				yt = np.vstack((sp_base_result, yi[i]))
				u_sp[j] += metrics.compute_uniqueness(yt)
				o_sp[j] += metrics.compute_overlap(yt)
				c_sp[j] += 1 - metrics.compute_distance(yt)
		u_sp_base_to_x_te /= ntest
		o_sp_base_to_x_te /= ntest
		c_sp_base_to_x_te /= ntest
		for i in xrange(9):
			u_sp[i] /= ntest
			o_sp[i] /= ntest
			c_sp[i] /= ntest
		
		# Log the results
		sp._log_stats('Base Train to Base Test Uniqueness',
			u_sp_base_to_x_te)
		sp._log_stats('Base Train to Base Test Overlap', o_sp_base_to_x_te)
		sp._log_stats('Base Train to Base Test Correlation', c_sp_base_to_x_te)
		for i, j in enumerate(xrange(1, 10)):
			sp._log_stats('Base Train to Novelty {0} Uniqueness'.format(j),
				u_sp[i])
			sp._log_stats('Base Train to Novelty {0} Overlap'.format(j),
				o_sp[i])
			sp._log_stats('Base Train to Novelty {0} Correlation'.format(j),
				c_sp[i])
		
		# Create an SVM
		clf = OneClassSVM(kernel='linear', nu=0.1, random_state=seed2)
		
		# Evaluate the SVM's performance
		clf.fit(x_tr)
		svm_x_te = len(np.where(clf.predict(x_te) == 1)[0]) / float(ntest) * \
			100
		svm_y_te = np.array([len(np.where(clf.predict(outlier) == -1)[0]) /
			float(ntest) * 100 for outlier in outliers])
		
		# Perform classification using overlap as the feature
		# -- The overlap must be above 50%
		clf_x_te = 0.
		clf_y_te = np.zeros(9)
		for i, x in enumerate(sp_x_te):
			xt = np.vstack((sp_base_result, x))
			xo = metrics.compute_overlap(xt)
			if xo >= clf_th: clf_x_te += 1
			
			for j, yi in enumerate(sp_y_te):
				yt = np.vstack((sp_base_result, yi[i]))
				yo = metrics.compute_overlap(yt)
				if yo < clf_th: clf_y_te[j] += 1
		clf_x_te = (clf_x_te / ntest) * 100
		clf_y_te = (clf_y_te / ntest) * 100
		
		# Store the results as errors
		sp_x_results[nt] = 100 - clf_x_te
		sp_y_results[nt] = 100 - clf_y_te
		svm_x_results[nt] = 100 - svm_x_te
		svm_y_results[nt] = 100 - svm_y_te
		
		# Log the results
		sp._log_stats('SP % Correct Base Class', clf_x_te)
		sp._log_stats('SVM % Correct Base Class', svm_x_te)
		for i, j in enumerate(xrange(1, 10)):
			sp._log_stats('SP % Correct Novelty Class {0}'.format(j),
				clf_y_te[i])
			sp._log_stats('SVM % Correct Novelty Class {0}'.format(j),
				svm_y_te[i])
		sp._log_stats('SP % Mean Correct Novelty Class', np.mean(clf_y_te))
		sp._log_stats('SVM % Mean Correct Novelty Class', np.mean(svm_y_te))
		sp._log_stats('SP % Adjusted Score', (np.mean(clf_y_te) * clf_x_te) /
			100)
		sp._log_stats('SVM % Adjusted Score', (np.mean(svm_y_te) * svm_x_te) /
			100)
	
	return sp_x_results, sp_y_results, svm_x_results, svm_y_results
示例#18
0
def main(log_dir, ntrain=800, ntest=200, niter=10, nsplits=5,
	global_inhibition=True, seed=None):
	"""
	Build the information needed to perform CV on a subset of the MNIST
	dataset.

	@param log_dir: The directory to store the results in.
	
	@param ntrain: The number of training samples to use.
	
	@param ntest: The number of testing samples to use.
	
	@param niter: The number of parameter iterations to use.
	
	@param nsplits: The number of splits of the data to use.
	
	@param global_inhibition: If True use global inhibition; otherwise, use
	local inhibition.
	
	@param seed: The seed for the random number generators.
	
	@return: The full set of X, the full set of Y, the keyword arguments for
	the classifier, the params for CV, and the CV.
	"""
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)
			
	# Create static parameters
	ninputs = tr_x.shape[1]
	kargs = {
		# Region parameters
		'ninputs': ninputs,
		'global_inhibition': global_inhibition,
		'trim': 1e-4,
		'seed': seed,
		
		# Synapse parameters
		'syn_th': 0.5,
		'random_permanence': True,
		
		# Fitting parameters
		'nepochs': 30,
		'clf': LinearSVC(random_state=seed)
		# NOTE: The SVM's will be identical, despite being seeded now
	}
	
	# Come up with some parameters to search
	param_distributions = {
		# Region parameters
		'ncolumns':randint(100, 1001),
		'nactive':uniform(0, 0.2),
		# As a percentage of the number of columns
		
		# Column parameters
		'max_boost': randint(1, 21),
		'duty_cycle': randint(10, 1001),
		
		# Segment parameters
		'nsynapses': randint(1, ninputs + 1),
		'seg_th': uniform(0, 0.1),
		# As a percentage of the number of synapses
		
		# Synapse parameters
		'pinc': uniform(0.001, 0.1),
		'pdec': uniform(0.001, 0.1),
		'pwindow': uniform(0.001, 0.1),
		
		# Fitting parameters
		'log_dir': log_dir
	}
	
	# Build the parameter generator
	gen = ParamGenerator(param_distributions, niter, nsplits, ninputs)
	params = {key:gen for key in param_distributions}
	
	return x, y, kargs, params, cv
示例#19
0
def main(ntrain=800, ntest=200, nsplits=1, seed=1234567):
	# Set the configuration parameters for the SP
	ninputs = 784
	kargs = {
		'ninputs': ninputs,
		'ncolumns': ninputs,
		'nactive': 10,
		'global_inhibition': True,
		'trim': False,
		'seed': seed,
		
		'disable_boost': True,
		
		'nsynapses': 392,
		'seg_th': 10,
		
		'syn_th': 0.5,
		'pinc': 0.001,
		'pdec': 0.002,
		'pwindow': 0.01,
		'random_permanence': True,
		
		'nepochs': 10,
		'clf': LinearSVC(random_state=seed),
		'log_dir': os.path.join('simple_mnist', '1-1')
	}
	
	# Seed numpy
	np.random.seed(seed)
	
	# Get the data
	(tr_x, tr_y), (te_x, te_y) = load_mnist()
	x, y = np.vstack((tr_x, te_x)), np.hstack((tr_y, te_y))
	
	# Split the data for CV
	cv = MNISTCV(tr_y, te_y, ntrain, ntest, nsplits, seed)
	
	# Execute the SP on each fold. Additionally, get results for each fitting
	# method.
	for i, (tr, te) in enumerate(cv):
		# Create the region
		sp = SPRegion(**kargs)
		
		# Train the region
		sp.fit(x[tr], y[tr])
		
		# Test the base classifier
		clf = LinearSVC(random_state=seed)
		clf.fit(x[tr], y[tr])
	
	# Get a random set of unique inputs from the training set
	inputs = np.zeros((10, ninputs))
	for i in xrange(10):
		ix = np.random.permutation(np.where(y[tr] == i)[0])[0]
		inputs[i] = x[tr][ix]
	
	# Get the SP's predictions for the inputs
	sp_pred = sp.predict(inputs)
	
	# Get the reconstruction in the context of the SP
	sp_inputs = sp.reconstruct_input(sp_pred)
	
	# Make a plot comparing the images
	shape = (28, 28)
	path = os.path.join(sp.log_dir, 'input_reconstruction.png')
	plot_compare_images((inputs, sp_pred, sp_inputs), shape, out_path=path)