def Highlighting_Test_Data_Points(): from mlxtend.plotting import plot_decision_regions from mlxtend.preprocessing import shuffle_arrays_unison import matplotlib.pyplot as plt from sklearn import datasets from sklearn.svm import SVC # Loading some example data iris = datasets.load_iris() data = pd.read_csv('2clstrain1200.csv', header=None) X, y = data.iloc[:, 0:2].values, data.iloc[:, 2].values X = X.astype(np.integer) y = y.astype(np.integer) X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3) X_train, y_train = X[:700], y[:700] X_test, y_test = X[700:], y[700:] # Training a classifier svm = SVC(C=0.5, kernel='linear') svm.fit(X_train, y_train) # Plotting decision regions plot_decision_regions(X, y, clf=svm, legend=2, X_highlight=X_test) # Adding axes annotations plt.xlabel('') plt.ylabel('') plt.title('SVM on Iris') plt.show()
def main(self): print(__doc__) #creating object instances obj = sat() #extracting features and output x, y = obj.load_data("gpa.csv") #splitting the data x_train, x_test, y_train, y_test = obj.split(x, y) #scaling the data #x_train,x_test,y_train,y_test = obj.scale(x_train,x_test,y_train,y_test) #missing value imputation x_train = obj.missing_val(x_train) x_test = obj.missing_val(x_test) y_train = obj.missing_val(y_train) y_test = obj.missing_val(y_test) #generating classifier clf = obj.classifier() #fitting the features into the model clf.fit(x_train, y_train) #plotting training set obj.plot(clf, x_train, y_train, "orange", "blue", "sat score (Training set)", "GPA", "SAT SCORE") #plotting the testing set obj.plot(clf, x_test, y_test, "orange", "blue", "sat score (Testing set)", "GPA", "SAT SCORE") #saving classifier obj.save_classifier(clf, "sat_score.pkl", "wb") #loading the data clf = obj.load_classifier("sat_score.pkl", "rb") x, y = shuffle_arrays_unison(arrays=[x, y], random_seed=5) plot_learning_curves(x_train, y_train, x, y, clf) plt.show()
def test_shuffle_arrays_unison(): X1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) y1 = np.array([1, 2, 3]) X2, y2 = shuffle_arrays_unison(arrays=[X1, y1], random_seed=3) assert (X2.all() == np.array([[4, 5, 6], [1, 2, 3], [7, 8, 9]]).all()) assert (y2.all() == np.array([2, 1, 3]).all())
def test_shuffle_arrays_unison(): X1 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) y1 = np.array([1, 2, 3]) X2, y2 = shuffle_arrays_unison(arrays=[X1, y1], random_seed=3) assert(X2.all() == np.array([[4, 5, 6], [1, 2, 3], [7, 8, 9]]).all()) assert(y2.all() == np.array([2, 1, 3]).all())
def batch_generator(file_siize): if 1000 % file_size != 0: print('wrong size!') os._exit() global batch_count file_num = int(batch_count % 11) batch_count += 1 filenames = glob('data/radarTrend.train.*.input.npy') filenames.sort() x_train = np.load(filenames[file_num]) filenames = glob('data/radarTrend.train.*.label.npy') filenames.sort() y_train = np.load(filenames[file_num]) x_train, y_train = shuffle_arrays_unison(arrays=[x_train, y_train]) x_train = np.split(x_train, file_size) y_train = np.split(y_train, file_size) x_train = np.array(x_train) y_train = np.array(y_train) file_num = np.random.choice(10) filenames = glob('data/radarTrend.test.*.input.npy') x_valid = np.load(filenames[file_num]) filenames = glob('data/radarTrend.test.*label.npy') y_valid = np.load(filenames[file_num]) x_valid, y_valid = shuffle_arrays_unison(arrays=[x_valid, y_valid]) x_valid = np.split(x_valid, file_size) y_valid = np.split(y_valid, file_size) x_valid = np.array(x_valid) y_valid = np.array(y_valid) return x_train, y_train, x_valid, y_valid
def Customizing_the_plotting_style(): from mlxtend.plotting import plot_decision_regions from mlxtend.preprocessing import shuffle_arrays_unison import matplotlib.pyplot as plt from sklearn.svm import SVC # Loading some example data data = pd.read_csv('2clstrain1200.csv', header=None) X, y = data.iloc[:, 0:2].values, data.iloc[:, 2].values X = X.astype(np.integer) y = y.astype(np.integer) X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3) X_train, y_train = X[:700], y[:700] X_test, y_test = X[700:], y[700:] # Training a classifier svm = SVC(C=0.5, kernel='linear') svm.fit(X_train, y_train) # Specify keyword arguments to be passed to underlying plotting functions scatter_kwargs = {'s': 120, 'edgecolor': None, 'alpha': 0.7} contourf_kwargs = {'alpha': 0.2} scatter_highlight_kwargs = {'s': 120, 'label': 'Test data', 'alpha': 0.7} # Plotting decision regions plot_decision_regions(X, y, clf=svm, legend=2, X_highlight=X_test, scatter_kwargs=scatter_kwargs, contourf_kwargs=contourf_kwargs, scatter_highlight_kwargs=scatter_highlight_kwargs) # Adding axes annotations plt.xlabel('') plt.ylabel('') plt.title('SVM on Iris') plt.show()
# dropout function def dropout(a, prob): shape = a.shape[0] vec = np.random.choice([0,1], size = (shape,1), p = [prob, 1-prob]) return vec * a #load dataset iris = datasets.load_iris() data = iris.data actual = iris.target # scramble arrays data = inputn(data) actual = inputn(actual) # shuffle the arrays the same data, actual = shuffle_arrays_unison(arrays=[data, actual], random_seed=3) # initialize weights, divide by sqrt(4), to prevent explosive gradients syn0 = np.random.randn(4,4) / 2 syn1 = np.random.randn(3,4) /2 bias0 = np.random.randn(4,1) /2 bias1 = np.random.randn(3,1) /2 # stores array of xarr = [] yarr = [] rate_syn = 0.1 rate_bias = 0.1 for yo in range(200):
# Load the dataset - This dataset has 60 columns plus the decision variable 'R' and 'M'. It has a total of 208 rows. X = np.loadtxt('sonar.txt', usecols=range(0, 60), delimiter=',') labels = np.genfromtxt('sonar.txt', delimiter=',', usecols=-1, dtype=str) # Traverse through the array Labels and replace 'R' with 1 and 'M' with 0. for i, label in enumerate(labels): if label == 'R': labels[i] = 1 else: labels[i] = 0 y = np.array(labels) # Shuffle the data so that the test data towards the end does not end up with only 0's from mlxtend.preprocessing import shuffle_arrays_unison X, y = shuffle_arrays_unison(arrays=[X, y], random_seed=3) # Convert y from an array of strings to array of integers y = y.astype(np.int) # Initialize the MLP class and fit the multi-layer perceptron model on X and y data. X and y is split implicitly inside the # MLP class into training and test data # Then predict the y values for X_test and store it in y_predicted # The default number of iterations is 100, but can be changed by passing a value to n_iters. Similarly, learning rate is # 0.01, but can be changed. In each iteration, we are printing the accuracy metrics for the predicted values of the training # data. mlp = MLP() mlp.fit(X, y) y_predicted = mlp.predict() # An accuracy of 70% has been gained on test data after 100 epochs.
# mnist.py from mlxtend.data import mnist_data from mlxtend.classifier import MultiLayerPerceptron as MLP from mlxtend.preprocessing import shuffle_arrays_unison X, y = mnist_data() X, y = shuffle_arrays_unison((X, y), random_seed=1) X_train, y_train = X[:500], y[:500] X_test, y_test = X[500:], y[500:] import matplotlib.pyplot as plt def plot_digit(X, y, idx): img = X[idx].reshape(28, 28) plt.imshow(img, cmap='Greys', interpolation='nearest') plt.title('true label: %d' % y[idx]) plt.show() plot_digit(X, y, 3500) from mlxtend.preprocessing import standardize X_train_std, params = standardize(X_train, columns=range(X_train.shape[1]), return_params=True) X_test_std = standardize(X_test, columns=range(X_test.shape[1]), params=params) nn1 = MLP(hidden_layers=[150], l2=0.00, l1=0.0, epochs=100,
# In[47]: plt.plot(range(len(nn2.cost_)), nn2.cost_) plt.ylabel('Cost') plt.xlabel('Epochs') plt.show() # ## Example 2 - Classifying Handwritten Digits from a 10% MNIST Subset # In[53]: from mlxtend.data import mnist_data from mlxtend.preprocessing import shuffle_arrays_unison X, y = mnist_data() X, y = shuffle_arrays_unison((X, y), random_seed=1) X_train, y_train = X[:500], y[:500] X_test, y_test = X[500:], y[500:] # Visualize a sample from the MNIST dataset to check if it was loaded correctly: # In[55]: import matplotlib.pyplot as plt def plot_digit(X, y, idx): img = X[idx].reshape(28, 28) plt.imshow(img, cmap='Greys', interpolation='nearest') plt.title('true label: %d' % y[idx]) plt.show()
# TODO error from mlxtend.evaluate import plot_learning_curves import matplotlib.pyplot as plt from mlxtend.data import iris_data from mlxtend.preprocessing import shuffle_arrays_unison from sklearn.neighbors import KNeighborsClassifier # Loading some example data X, y = iris_data() X, y = shuffle_arrays_unison(arrays=[X, y]) X_train, X_test = X[:100], X[100:] y_train, y_test = y[:100], y[100:] clf = KNeighborsClassifier(n_neighbors=5) plot_learning_curves(X_train, y_train, X_test, y_test, clf) plt.show()
bias0 = pickle.load(b0) bias1 = pickle.load(b1) # load MNIST dataset mnist = fetch_mldata("MNIST original") # learning rates for weights and bias rate_syn = .0001 rate_bias = .0001 # data set xarr = mnist.data yarr = mnist.target # shuffle arrays xarr, yarr = shuffle_arrays_unison(arrays=[xarr, yarr], random_seed=4) # only ujse first 1000 samples for training xarr = xarr[:1000] yarr = yarr[:1000] # graphing variables xbar = [] ybar = [] correct = 0 loss = 0 # training loop for epoch in range(10): for image in range(1000): # convolution operation with max pooling input = layer.convert_to_2d_image(xarr[image])
plt.show() plt.plot(range(len(nn2.cost_)), nn2.cost_) plt.ylabel('Cost') plt.xlabel('Epochs') plt.show() # Classify handwritten digits from a 10% mnist subset from mlxtend.data import mnist_data from mlxtend.preprocessing import shuffle_arrays_unison, standardize X, y = mnist_data() X, y = shuffle_arrays_unison((X, y), random_seed=1) X_train, y_train = X[:500], y[:500] X_test, y_test = X[500:], y[500:] def plot_digit(X, y, idx): img = X[idx].reshape(28, 28) # 784 => 28 * 28 plt.imshow(img, cmap='Greys', interpolation='nearest') plt.title('true label: %d' % y[idx]) plt.show() plot_digit(X, y, 3500) X_train_std, params = standardize(X_train, columns=range(X_train.shape[1]), return_params=True) X_test_std = standardize(X_test, columns=range(X_test.shape[1]), params=params) nn1 = MLP(hidden_layers=[150],