예제 #1
0
#!/usr/bin/env python

import matplotlib.pyplot as pl
import numpy as np
from utils import util
from matplotlib.patches import Ellipse

data = util.load_mat('heightWeight/heightWeight')
data = data['heightWeightData']
sex = data[:, 0]
x = data[:, 1]
y = data[:, 2]
male_arg = (sex == 1)
female_arg = (sex == 2)
x_male = x[male_arg]
y_male = y[male_arg]
x_female = x[female_arg]
y_female = y[female_arg]

fig = pl.figure()
ax = fig.add_subplot(111)
ax.plot(x_male, y_male, 'bx')
ax.plot(x_female, y_female, 'ro')
pl.savefig('gaussHeightWeight_1.png')


def draw_ell(cov, xy, color):
    u, v = np.linalg.eigh(cov)
    angle = np.arctan2(v[0][1], v[0][0])
    angle = (180 * angle / np.pi)
    # here we time u2 with 5, assume 95% are in this ellipse
예제 #2
0
#!/usr/bin/env python

import numpy as np
import matplotlib.pyplot as pl
import utils.util as util
from scipy import ndimage

data = util.load_mat('20news_w100')
documents = data['documents']
documents = documents.toarray().T
newsgroups = data['newsgroups'][0]

#sort documents by number of words and choose the first 1000
chosen_docs_arg = np.argsort(np.sum(documents, axis=1))
chosen_docs_arg = chosen_docs_arg[-1000:][::-1]  # descend
documents = documents[chosen_docs_arg]
newsgroups = newsgroups[chosen_docs_arg]

#sort by newsgroups label
sorted_arg = np.argsort(newsgroups)
documents = documents[sorted_arg]
newsgroups = newsgroups[sorted_arg]

#zoom the image to show it
image = ndimage.zoom(documents, (1, 10))
pl.imshow(image, cmap=pl.cm.gray, interpolation='none')
#draw a red line betweent different newsgroups
groups_label = np.unique(newsgroups)
for i in range(len(groups_label) - 1):
    y, = np.where(newsgroups == groups_label[i + 1])
    y = y[0]
예제 #3
0
#!/usr/bin/env python

# Fit logistic model to SAT scores.

import matplotlib.pyplot as pl
import numpy as np
import utils.util as util
from scipy.special import logit
from sklearn.linear_model import LogisticRegressionCV

data = util.load_mat('sat/sat.mat')
sat = data['sat']

# 3rd column contains SAT scores
X, y = sat[:,3], sat[:,0]
X = X.reshape((len(X), 1))
y = y.reshape((len(X), 1))

logistic = LogisticRegressionCV()
print X
model = logistic.fit(X, y)

# Solve for the decision boundary
a = model.coef_; b = model.intercept_;
threshold = (logit(0.5) - b)/a;

pl.axis([450, 655, -.05, 1.05])
pl.plot(X, y, 'ko')
pl.plot(X, model.predict_proba(X)[:,1], 'ro')
pl.plot(525, 0, 'bx', linewidth=2, markersize=14);
pl.plot(525, 1, 'bx', linewidth=2, markersize=14);
예제 #4
0
#!/usr/bin/env python

import matplotlib.pyplot as pl
import numpy as np
import utils.util as util
from sklearn.naive_bayes import MultinomialNB

data = util.load_mat('XwindowsDocData')
xtrain = data['xtrain']
ytrain = data['ytrain']

clf = MultinomialNB()
clf.fit(xtrain, ytrain.ravel())
counts = clf.feature_count_
y_counts = clf.class_count_
for i in range(len(counts)):
    pl.figure()
    pl.bar(np.arange(len(counts[i])), counts[i] / y_counts[i])
    pl.title('p(xj=1|y=%d)' % (i + 1))
    pl.savefig('naiveBayesBowDemo_%d.png' % i)
pl.show()
예제 #5
0
#!/usr/bin/env python

import matplotlib.pyplot as pl
import numpy as np
from utils import util


data = util.load_mat('mnistAll')
mnist = data['mnist']
train_images = mnist['train_images'][0][0]  # 28*28*60000
train_labels = mnist['train_labels'][0][0]  # 60000*1
test_images = mnist['test_images'][0][0]  # 28*28*10000
test_labels = mnist['test_labels'][0][0]  # 10000*1

fig1 = pl.figure(1)
fig2 = pl.figure(2)
np.random.seed(seed=10)
for i in range(1, 10):
    img = test_images[:, :, i]
    ax1 = fig1.add_subplot(3, 3, i)
    ax1.imshow(img)
    ax1.set_xticks(())
    ax1.set_yticks(())
    ax1.set_title('true class = %s' % test_labels[i])

    img_shuffled = img.copy()
    # np.shuffle only along the first index, ravel it first
    np.random.shuffle(img_shuffled.ravel())
    img_shuffled = img_shuffled.reshape(img.shape)
    ax2 = fig2.add_subplot(3, 3, i)
    ax2.imshow(img_shuffled)
예제 #6
0
#!/usr/bin/env python

import matplotlib.pyplot as pl
import numpy as np
import utils.util as util
from matplotlib.patches import Ellipse

data = util.load_mat('heightWeight')
data = data['heightWeightData']
sex = data[:, 0]
x = data[:, 1]
y = data[:, 2]
male_arg = (sex == 1)
female_arg = (sex == 2)
x_male = x[male_arg]
y_male = y[male_arg]
x_female = x[female_arg]
y_female = y[female_arg]

fig = pl.figure()
ax = fig.add_subplot(111)
ax.plot(x_male, y_male, 'bx')
ax.plot(x_female, y_female, 'ro')
pl.savefig('gaussHeightWeight_1.png')


def draw_ell(cov, xy, color):
    u, v = np.linalg.eigh(cov)
    angle = np.arctan2(v[0][1], v[0][0])
    angle = (180 * angle / np.pi)
    # here we time u2 with 5, assume 95% are in this ellipse
예제 #7
0
#!/usr/bin/env python

# Fit linear and quadratic surfaces to data
# Based on code by Romain Thibaux <*****@*****.**>

import matplotlib.pyplot as pl
import numpy as np
import utils.util as util
from mpl_toolkits.mplot3d import Axes3D

data = util.load_mat('moteData/moteData.mat')
X = data['X']
y = data['y']

X_pad = util.add_ones(X)

for use_quad in (False, True):
    phi = X_pad

    if use_quad:
        phi = np.column_stack((X_pad, X**2))

    fig = pl.figure()
    ax = Axes3D(fig)
    ax.set_zlim(15, 19)
    ax.scatter(X[:, 0], X[:, 1], y)

    xrange = np.linspace(min(X[:, 0]), max(X[:, 0]), 10)
    yrange = np.linspace(min(X[:, 1]), max(X[:, 1]), 10)
    xx, yy = np.meshgrid(xrange, yrange)
    flatxx = xx.reshape((100, 1))
예제 #8
0
#!/usr/bin/env python

import numpy as np
import matplotlib.pyplot as pl
from utils import util
from scipy import ndimage

data = util.load_mat('20news_w100')
documents = data['documents']
documents = documents.toarray().T
newsgroups = data['newsgroups'][0]

#sort documents by number of words and choose the first 1000
chosen_docs_arg = np.argsort(np.sum(documents, axis=1))
chosen_docs_arg = chosen_docs_arg[-1000:][::-1]  # descend
documents = documents[chosen_docs_arg]
newsgroups = newsgroups[chosen_docs_arg]

#sort by newsgroups label
sorted_arg = np.argsort(newsgroups)
documents = documents[sorted_arg]
newsgroups = newsgroups[sorted_arg]

#zoom the image to show it
image = ndimage.zoom(documents, (1, 10))
pl.imshow(image, cmap=pl.cm.gray, interpolation='none')
#draw a red line betweent different newsgroups
groups_label = np.unique(newsgroups)
for i in range(len(groups_label) - 1):
    y, = np.where(newsgroups == groups_label[i + 1])
    y = y[0]
예제 #9
0
#!/usr/bin/env python

# Fit logistic model to SAT scores.

import matplotlib.pyplot as plt
import numpy as np
from utils import util
from scipy.special import logit
from sklearn.linear_model import LogisticRegressionCV, LogisticRegression, LinearRegression

data = util.load_mat('sat/sat.mat')
sat = data['sat']

# 3rd column contains SAT scores
X, y = sat[:, 3], sat[:, 0]
X = X.reshape((len(X), 1))
#y = y.reshape((len(X), 1))

#logistic = LogisticRegressionCV() # by default, cv=None, C=10
logistic = LogisticRegression(C=1e9)  # turn off regularization
model = logistic.fit(X, y)

xtest = np.linspace(400, 700, 100)
xtest = xtest.reshape((len(xtest), 1))
# Solve for the decision boundary
a = model.coef_
b = model.intercept_
threshold = (logit(0.5) - b) / a

fig, ax = plt.subplots()
#plt.axis([450, 655, -.05, 1.05])
예제 #10
0
#!/usr/bin/env python

# Fit linear and quadratic surfaces to data
# Based on code by Romain Thibaux <*****@*****.**>

import matplotlib.pyplot as pl
import numpy as np
import utils.util as util
from mpl_toolkits.mplot3d import Axes3D

data = util.load_mat('moteData/moteData.mat')
X = data['X']
y = data['y']

X_pad = util.add_ones(X)

for use_quad in (False, True):
  phi = X_pad

  if use_quad:
    phi = np.column_stack((X_pad, X**2))

  fig = pl.figure()
  ax = Axes3D(fig)
  ax.set_zlim(15, 19)
  ax.scatter(X[:,0], X[:,1], y)

  xrange = np.linspace(min(X[:,0]), max(X[:,0]), 10)
  yrange = np.linspace(min(X[:,1]), max(X[:,1]), 10)
  xx, yy = np.meshgrid(xrange, yrange)
  flatxx = xx.reshape((100, 1))
예제 #11
0
#!/usr/bin/env python

import matplotlib.pyplot as pl
import numpy as np
import utils.util as util


data = util.load_mat('mnistAll')
mnist = data['mnist']
train_images = mnist['train_images'][0][0]  # 28*28*60000
train_labels = mnist['train_labels'][0][0]  # 60000*1
test_images = mnist['test_images'][0][0]  # 28*28*10000
test_labels = mnist['test_labels'][0][0]  # 10000*1

fig1 = pl.figure(1)
fig2 = pl.figure(2)
np.random.seed(seed=10)
for i in range(10):
    img = test_images[:, :, i]
    ax1 = fig1.add_subplot(3, 3, i)
    ax1.imshow(img)
    ax1.set_xticks(())
    ax1.set_yticks(())
    ax1.set_title('true class = %s' % test_labels[i])

    img_shuffled = img.copy()
    # np.shuffle only along the first index, ravel it first
    np.random.shuffle(img_shuffled.ravel())
    img_shuffled = img_shuffled.reshape(img.shape)
    ax2 = fig2.add_subplot(3, 3, i)
    ax2.imshow(img_shuffled)