def run_test(params, model): if model == "rf": n_tree, mtry = params print "# Trees: ", n_tree print "mtry: ", mtry rf = RandomForestClassifier(n_estimators= int(n_tree), verbose = True, n_jobs = -1, max_features= int(mtry)) rf.fit(X, y) modelPred = rf.predict(X) elif model == "svm": C, kernel = params print "# Cost: ", C print "kernel: ", kernel svmod = SVC(int(C), kernel) svmod.fit(X, y) modelPred = svmod.predict(X) elif model == "knn": k = params print "# k: ", k knnmod = KNeighborsClassifier(int(k)) knnmod.fit(X, y) modelPred =knnmod.predict(X) elif model == "NeuralNetwork": n_components, learning_rate, batch_size, n_iter = params print "# n_components: ", n_components print "# learning_rate: ", learning_rate print "# batch_size: ", batch_size print "# n_iter: ", n_iter nnmod = BernoulliRBM(int(n_components), learning_rate, int(batch_size), int(n_iter)) nnmod.fit(X, y) modelPred =nnmod.score_samples(X) accuError = AccuracyErrorCalc(y, modelPred) return accuError
def rbm_001(): s = 15 crop = 150 n_patches = 400000 rf_size = 5 train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() images = images.reshape((images.shape[0], 15 * 15 * 3)) # rbm needs inputs to be between 0 and 1 scaler = MinMaxScaler() images = scaler.fit_transform(images) # Training takes a long time, says 80 seconds per iteration, but seems like longer # And this is only with 256 components rbm = BernoulliRBM(verbose=1) rbm.fit(images) train_x = rbm.transform(images) train_y = classes.train_solutions.data # 0.138 CV on 50% of the dataset wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True)
def test_rbm_verbose(): rbm = BernoulliRBM(n_iter=2, verbose=10) old_stdout = sys.stdout sys.stdout = StringIO() try: rbm.fit(Xdigits) finally: sys.stdout = old_stdout
def neural_network_classify(train_data,train_label,test_data): # nnc=MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1) nnc=BernoulliRBM(random_state=0, verbose=True) nnc.fit(train_data, ravel(train_label)) test_label=ncc.predict(test_data) save_result(test_label,'sklearn_neural_network_classify_Result.csv') return test_label
def Bernoulli(X_train, X_test, y_train, y_test): mod = BernoulliRBM(random_state=0, verbose=True) mod.fit(X_train, y_train) print "Done training" bernoulli_labels = mod.predict(X_test) print "Done testing" bernoulli_score = mod.score(X_test, y_test) return bernoulli_score, bernoulli_labels
def neural_network_classify(train_data,train_label,test_data): nnc=BernoulliRBM(random_state=0, verbose=True) nnc.fit(train_data, ravel(train_label)) test_label=ncc.predict(test_data) save_result(test_label,'sklearn_neural_network_classify_Result.csv') return test_label
def test_gibbs_smoke(): """ just seek if we don't get NaNs sampling the full digits dataset """ rng = np.random.RandomState(42) X = Xdigits rbm1 = BernoulliRBM(n_components=42, batch_size=10, n_iter=20, random_state=rng) rbm1.fit(X) X_sampled = rbm1.gibbs(X) assert_all_finite(X_sampled)
def test_transform(): X = Xdigits[:100] rbm1 = BernoulliRBM(n_components=16, batch_size=5, n_iter=5, random_state=42) rbm1.fit(X) Xt1 = rbm1.transform(X) Xt2 = rbm1._mean_hiddens(X) assert_array_equal(Xt1, Xt2)
def train_rbm(X, n_components=100, n_iter=10): X = X.astype(np.float64) X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) # scale to [0..1] rbm = BernoulliRBM(random_state=0, verbose=True) rbm.learning_rate = 0.06 rbm.n_iter = n_iter rbm.n_components = n_components rbm.fit(X) return rbm
def testRBM(): X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) print X model = BernoulliRBM(n_components=2) model.fit(X) print dir(model) print model.transform(X) print model.score_samples(X) print model.gibbs
def test_gibbs_smoke(): """Check if we don't get NaNs sampling the full digits dataset. Also check that sampling again will yield different results.""" X = Xdigits rbm1 = BernoulliRBM(n_components=42, batch_size=40, n_iter=20, random_state=42) rbm1.fit(X) X_sampled = rbm1.gibbs(X) assert_all_finite(X_sampled) X_sampled2 = rbm1.gibbs(X) assert_true(np.all((X_sampled != X_sampled2).max(axis=1)))
def test_sample_hiddens(): rng = np.random.RandomState(0) X = Xdigits[:100] rbm1 = BernoulliRBM(n_components=2, batch_size=5, n_iter=5, random_state=42) rbm1.fit(X) h = rbm1._mean_hiddens(X[0]) hs = np.mean([rbm1._sample_hiddens(X[0], rng) for i in range(100)], 0) assert_almost_equal(h, hs, decimal=1)
def test_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, n_iter=7, random_state=9) rbm.fit(X) assert_almost_equal(rbm.score_samples(X).mean(), -21.0, decimal=0) # in-place tricks shouldn't have modified X assert_array_equal(X, Xdigits)
class DeepRbmMnistClassifier: def __init__(self): self.n_components_first = 500 self.n_components_second = 500 self.n_components_third = 2000 self.n_iter_first = 20 self.n_iter_second = 20 self.n_iter_third = 20 self.learning_rate_first = 0.06 self.learning_rate_second = 0.06 self.learning_rate_third = 0.06 self.verbose = True def label_to_feature(self,y): feature = [0]*10 feature[y] = 1 return feature def fit(self,X,y): self.rbm_1 = BernoulliRBM(verbose=self.verbose, n_components=self.n_components_first, n_iter=self.n_iter_first, learning_rate=self.learning_rate_first) self.rbm_2 = BernoulliRBM(verbose=self.verbose, n_components=self.n_components_second, n_iter=self.n_iter_second, learning_rate=self.learning_rate_second) self.first_pipeline = Pipeline(steps=[('rbm_1',self.rbm_1), ('rbm_2',self.rbm_2)]) self.first_pipeline.fit(X,y) # TODO improve. Look at how it is done in classify new_features = [] for example,label in zip(X,y): transformed = self.first_pipeline.transform(example)[0] new_features.append(np.concatenate((transformed,self.label_to_feature(label)))) self.rbm_3 = BernoulliRBM(verbose=self.verbose, n_components=self.n_components_third, n_iter=self.n_iter_third, learning_rate=self.learning_rate_third) self.rbm_3.fit(new_features,y) def classify(self,X): transformed = self.first_pipeline.transform(X) transformed = np.concatenate((transformed,[[0]*10]*len(transformed)),axis=1) # The inverse of rbm_3 to go from hidden layer to visible layer rbm_aux = BernoulliRBM() rbm_aux.intercept_hidden_ = self.rbm_3.intercept_visible_ rbm_aux.intercept_visible_ = self.rbm_3.intercept_hidden_ rbm_aux.components_ = np.transpose(self.rbm_3.components_) results = rbm_aux.transform(self.rbm_3.transform(transformed)) results = results[:,-10:] return np.argmax(results,axis=1)
def test_score_samples(): """Check that the pseudo likelihood is computed without clipping. http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/ """ rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert((rbm1.score_samples(X) < -300).all())
def run_auto(): X = load_data('gender/male') X = X.astype(np.float32) / 256 rbm = BernoulliRBM(random_state=0, verbose=True) rbm.learning_rate = 0.06 rbm.n_iter = 20 rbm.n_components = 2000 rbm.fit(X) cimgs = [comp.reshape(100, 100) for comp in rbm.components_] smartshow(cimgs[:12]) return rbm
def test_gibbs(): rng = np.random.RandomState(42) X = Xdigits[:100] rbm1 = BernoulliRBM(n_components=2, batch_size=5, n_iter=5, random_state=rng) rbm1.fit(X) Xt1 = np.mean([rbm1.gibbs(X[0]) for i in range(100)], 0) Xt2 = np.mean([rbm1._sample_visibles(rbm1._sample_hiddens(X[0], rng), rng) for i in range(1000)], 0) assert_almost_equal(Xt1, Xt2, decimal=1)
def bernoulli_rbm(data, labels): print '> running rbm' print 'visible units: %d' % len(data) print 'hidden units: %d' % hidden_units print 'epochs size: %d' % epochs_size print '-------------' rbm = BernoulliRBM(batch_size=32, learning_rate=0.1, n_components=5, n_iter=10, random_state=numpy.RandomState, verbose=True) rbm.fit(data, labels) training_data = np.array(data) rbm.train(training_data, epochs_size, True)
def test_fit_gibbs(): # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] # from the same input rng = np.random.RandomState(42) X = np.array([[0.], [1.]]) rbm1 = BernoulliRBM(n_components=2, batch_size=2, n_iter=42, random_state=rng) # you need that much iters rbm1.fit(X) assert_almost_equal(rbm1.components_, np.array([[0.02649814], [0.02009084]]), decimal=4) assert_almost_equal(rbm1.gibbs(X), X) return rbm1
def test_fit_gibbs_sparse(): # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from # the same input even when the input is sparse, and test against non-sparse rbm1 = test_fit_gibbs() rng = np.random.RandomState(42) from scipy.sparse import csc_matrix X = csc_matrix([[0.], [1.]]) rbm2 = BernoulliRBM(n_components=2, batch_size=2, n_iter=42, random_state=rng) rbm2.fit(X) assert_almost_equal(rbm2.components_, np.array([[0.02649814], [0.02009084]]), decimal=4) assert_almost_equal(rbm2.gibbs(X), X.toarray()) assert_almost_equal(rbm1.components_, rbm2.components_)
def test_fit_gibbs_sparse(): # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from # the same input even when the input is sparse, and test against non-sparse rbm1 = test_fit_gibbs() rng = np.random.RandomState(42) from scipy.sparse import csc_matrix X = csc_matrix([[0.], [1.]]) rbm2 = BernoulliRBM(n_components=2, batch_size=2, n_iter=42, random_state=rng) rbm2.fit(X) assert_almost_equal(rbm2.components_, np.array([[0.02649814], [0.02009084]]), decimal=4) assert_almost_equal(rbm2.gibbs(X), X.toarray()) assert_almost_equal(rbm1.components_, rbm2.components_)
class _BernoulliRBMImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if y is not None: self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def transform(self, X): return self._wrapped_model.transform(X)
def test_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, n_iter=7, random_state=9) rbm.fit(X) assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0) # in-place tricks shouldn't have modified X assert_array_equal(X, Xdigits)
def temp(features): [featuresNorm, MAX, MIN] = normalizeFeatures(features) [X, Y] = listOfFeatures2Matrix(featuresNorm) rbm = BernoulliRBM(n_components = 10, n_iter = 1000, learning_rate = 0.01, verbose = False) X1 = X[0::2] X2 = X[1::2] Y1 = Y[0::2] Y2 = Y[1::2] rbm.fit(X1,Y1) YY = rbm.transform(X1) for i in range(10):plt.plot(YY[i,:],'r') for i in range(10):plt.plot(YY[i+10,:],'g') for i in range(10):plt.plot(YY[i+20,:],'b') plt.show()
def update_feat_with_RBMs(s_data, greedy_pre_train=1): data = scale(s_data.get_data()) print(np.min(data)) print(np.max(data)) # Fit and Transform data for i in range(greedy_pre_train): # Initialize the RBM rbm = BernoulliRBM(n_components=90, n_iter=50, learning_rate=0.01, verbose=True) rbm.fit(data) s_data.update_features(rbm.transform) data = s_data.get_data()
def _RBM(self, X, y): from sklearn.neural_network import BernoulliRBM # PCA model creation, number of components # feature extraction method. Used here (after sampling) because we are # creating an universal model and not this_dataset-specific. neural_network = BernoulliRBM(n_components=self.k_features) neural_network.fit(X, y) X = neural_network.transform(X) self.feature_reduction_method = neural_network return X
def temp(features): [featuresNorm, MAX, MIN] = normalizeFeatures(features) [X, Y] = listOfFeatures2Matrix(featuresNorm) rbm = BernoulliRBM(n_components = 10, n_iter = 1000, learning_rate = 0.01, verbose = False) X1 = X[0::2] X2 = X[1::2] Y1 = Y[0::2] Y2 = Y[1::2] rbm.fit(X1,Y1) YY = rbm.transform(X1) for i in range(10):plt.plot(YY[i,:],'r') for i in range(10):plt.plot(YY[i+10,:],'g') for i in range(10):plt.plot(YY[i+20,:],'b') plt.show()
def _RBM(self, X, y): from sklearn.neural_network import BernoulliRBM # PCA model creation, number of components # feature extraction method. Used here (after sampling) because we are # creating an universal model and not this_dataset-specific. neural_network = BernoulliRBM(n_components=self.k_features) neural_network.fit(X, y) X = neural_network.transform(X) self.feature_reduction_method = neural_network return X
def test_fit_gibbs(): """ Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from the same input """ rng = np.random.RandomState(42) X = np.array([[0.], [1.]]) rbm1 = BernoulliRBM(n_components=2, batch_size=2, n_iter=42, random_state=rng) # you need that much iters rbm1.fit(X) assert_almost_equal(rbm1.components_, np.array([[0.02649814], [0.02009084]]), decimal=4) assert_almost_equal(rbm1.gibbs(X), X) return rbm1
def pretraining(self): input_layer = self.x_train for i in range(len(self.hidden_layer)): print("DBN Layer {0} Pre-training".format(i + 1)) rbm = BernoulliRBM(n_components=self.hidden_layer[i], learning_rate=self.learning_rate_rbm, batch_size=self.batch_size_rbm, n_iter=self.n_epochs_rbm, verbose=self.verbose_rbm, random_state=self.verbose_rbm) rbm.fit(input_layer) # size of weight matrix is [input_layer, hidden_layer] self.weight_rbm.append(rbm.components_.T) self.bias_rbm.append(rbm.intercept_hidden_) input_layer = rbm.transform(input_layer) print('Pre-training finish.')
def train_ca_cd(type, X_train, y_train, X_test, y_test): input_layer = X_train hidden_layer = [250, 500, 200] weight_rbm = [] bias_rbm = [] for i in range(len(hidden_layer)): print("DBN Layer {0} Pre-training".format(i + 1)) rbm = BernoulliRBM(n_components=hidden_layer[i], learning_rate=0.0005, batch_size=512, n_iter=200, verbose=2, random_state=1) rbm.fit(input_layer) # size of weight matrix is [input_layer, hidden_layer] weight_rbm.append(rbm.components_.T) bias_rbm.append(rbm.intercept_hidden_) input_layer = rbm.transform(input_layer) print('Pre-training finish.', np.shape(weight_rbm[0]), np.shape(bias_rbm[0])) test_rms = 0 result = [] model = Sequential() print('Fine-tuning start.') for i in range(0, len(hidden_layer)): print('i:', i) if i == 0: model.add( Dense(hidden_layer[i], activation='sigmoid', input_dim=np.shape(X_train)[1])) elif i >= 1: model.add(Dense(hidden_layer[i], activation='sigmoid')) else: pass layer = model.layers[i] layer.set_weights([weight_rbm[i], bias_rbm[i]]) # model.add(Dense(np.shape(yTrain)[1], activation='linear')) model.add( Dense(1, activation='linear', kernel_regularizer=regularizers.l2(0.01))) # sgd = SGD(lr=0.005, decay=0) model.compile(loss='mse', optimizer="rmsprop") # sgd model.fit(X_train, y_train, batch_size=150, epochs=100, verbose=5) model.save('../model/dwt_dbn_' + type + '_100.h5') print('Fine-tuning finish.') return model
def estimate_n_components(): X = load_data('gender/male') X = X.astype(np.float32) / 256 n_comp_list = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200] scores = [] for n_comps in n_comp_list: rbm = BernoulliRBM(random_state=0, verbose=True) rbm.learning_rate = 0.06 rbm.n_iter = 50 rbm.n_components = 100 rbm.fit(X) score = rbm.score_samples(X).mean() scores.append(score) plt.figure() plt.plot(n_comp_list, scores) plt.show() return n_comp_list, scores
def update_feat_with_DBN(s_data, output=400, rbm_num=2): # out_dims = np.arange(100, output + 100, int(output/rbm_num)) out_dims = [60, 30] print(out_dims) data = scale(s_data.get_data()) print(np.min(data)) print(np.max(data)) for i in range(rbm_num): # Initialize the RBM rbm = BernoulliRBM(n_components=out_dims[i], n_iter=50, learning_rate=0.01, verbose=True) rbm.fit(data) s_data.update_features(rbm.transform) data = s_data.get_data()
def estimate_n_components(): X = load_data('gender/male') X = X.astype(np.float32) / 256 n_comp_list = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200] scores = [] for n_comps in n_comp_list: rbm = BernoulliRBM(random_state=0, verbose=True) rbm.learning_rate = 0.06 rbm.n_iter = 50 rbm.n_components = 100 rbm.fit(X) score = rbm.score_samples(X).mean() scores.append(score) plt.figure() plt.plot(n_comp_list, scores) plt.show() return n_comp_list, scores
def test_sparse_and_verbose(): # Make sure RBM works with sparse input when verbose=True old_stdout = sys.stdout sys.stdout = StringIO() from scipy.sparse import csc_matrix X = csc_matrix([[0.], [1.]]) rbm = BernoulliRBM(n_components=2, batch_size=2, n_iter=1, random_state=42, verbose=True) try: rbm.fit(X) s = sys.stdout.getvalue() # make sure output is sound assert re.match(r"\[BernoulliRBM\] Iteration 1," r" pseudo-likelihood = -?(\d)+(\.\d+)?," r" time = (\d|\.)+s", s) finally: sys.stdout = old_stdout
def pretrain(self, save=True): visual_layer = self.data for i in range(len(self.hidden_sizes)): print("[DBN] Layer {} Pre-Training".format(i + 1)) rbm = BernoulliRBM(n_components=self.hidden_sizes[i], n_iter=self.rbm_iters[i], learning_rate=self.rbm_learning_rate[i], verbose=True, batch_size=128) rbm.fit(visual_layer) self.rbm_weights.append(rbm.components_) self.rbm_biases.append(rbm.intercept_hidden_) self.rbm_h_act.append(rbm.transform(visual_layer)) visual_layer = self.rbm_h_act[-1]
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score)
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score)
def trainRBM_SVM(features, Cparam, nComponents): [X, Y] = listOfFeatures2Matrix(features) rbm = BernoulliRBM(n_components = nComponents, n_iter = 30, learning_rate = 0.2, verbose = True) rbm.fit(X,Y) newX = rbm.transform(X) # colors = ["r","g","b"] # for i in range(1,Y.shape[0],5): # plt.plot(newX[i,:], colors[int(Y[i])]) # plt.show() classifier = {} classifier["rbm"] = rbm svm = sklearn.svm.SVC(C = Cparam, kernel = 'linear', probability = True) svm.fit(newX,Y) classifier["svm"] = svm return classifier
def trainRBM_SVM(features, Cparam, nComponents): [X, Y] = listOfFeatures2Matrix(features) rbm = BernoulliRBM(n_components = nComponents, n_iter = 30, learning_rate = 0.2, verbose = True) rbm.fit(X,Y) newX = rbm.transform(X) # colors = ["r","g","b"] # for i in range(1,Y.shape[0],5): # plt.plot(newX[i,:], colors[int(Y[i])]) # plt.show() classifier = {} classifier["rbm"] = rbm svm = sklearn.svm.SVC(C = Cparam, kernel = 'linear', probability = True) svm.fit(newX,Y) classifier["svm"] = svm return classifier
def add_Brbm(Visible, components, rs, learning_rate, verbose=None, n_iter=None): rbm = BernoulliRBM(n_components=components, random_state=rs, learning_rate=learning_rate, verbose=False, n_iter=50) rbm.fit(Visible) rbm_data = { 'coefs': np.transpose(np.array(rbm.components_)), 'bias': np.array(rbm.intercept_hidden_), 'hidden': rbm.transform(Visible) } return rbm_data
def RBM(): filename = "../data/smaller.dta" raw_data = open(filename, 'rt') data = np.loadtxt(raw_data, delimiter=" ") X = data[:, :3] Y = data[:, 3] print(X) print(Y) print("training on RBM") rbm = BernoulliRBM(random_state=0, verbose=True) rbm.learning_rate = 0.06 rbm.n_iter = 20 rbm.n_components = 100 rbm.fit(X, Y) predictions = rbm.transform(X) params = rbm.get_params() print("predictions = ", predictions) print("rbm = ", rbm) print("params = ", params)
def rbm_001(): s = 15 crop = 150 n_patches = 400000 rf_size = 5 train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() images = images.reshape((images.shape[0], 15 * 15 * 3)) # rbm needs inputs to be between 0 and 1 scaler = MinMaxScaler() images = scaler.fit_transform(images) # Training takes a long time, says 80 seconds per iteration, but seems like longer # And this is only with 256 components rbm = BernoulliRBM(verbose=1) rbm.fit(images) train_x = rbm.transform(images) train_y = classes.train_solutions.data # 0.138 CV on 50% of the dataset wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True)
def CalculateObjectFunction(chrom): W_Comb_1 = int(chrom[0]) W_Comb_2 = int(chrom[1]) W_Comb_3 = int(chrom[2]) Decoded_X4_W = Decode_X4(chrom[3:]) kf = RepeatedKFold(n_splits=5, n_repeats=2) AC = [] for train, test in kf.split(X): model = BernoulliRBM(n_components=W_Comb_1, learning_rate=Decoded_X4_W, batch_size=W_Comb_2, n_iter=W_Comb_3, verbose=1, random_state=0) model.fit(X[train]) AC.append(model.score_samples(X[test]).mean()) return statistics.mean(AC)
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score)
def test_fit_transform(): """Check proper implementation of fit_transform""" X = Xdigits[:100] rbm1 = BernoulliRBM(n_components=16, batch_size=5, n_iter=5, random_state=42) rbm2 = clone(rbm1) Xt1 = rbm1.fit(X).transform(X) Xt2 = rbm2.fit_transform(X) assert_array_equal(Xt1, Xt2)
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score)
def test_score_samples(): # Test score_samples (pseudo-likelihood) method. # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert (rbm1.score_samples(X) < -300).all() # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score) # Test numerical stability (#2785): would previously generate infinities # and crash with an exception. with np.errstate(under="ignore"): rbm1.score_samples([np.arange(1000) * 100])
def pretrain(self): self.weight_rbm = [] self.bias_rbm = [] x_train = self.x_train y_train = self.y_train hidden_layer_structure = self.get_hidden_layer_structure() input_layer = x_train for i in range(len(hidden_layer_structure)): rbm = BernoulliRBM(n_components=hidden_layer_structure[i], learning_rate=self.learning_rate_rbm, batch_size=self.batch_size_rbm, n_iter=self.n_epochs_rbm, verbose=1, random_state=self.random_seed) rbm.fit(input_layer) self.weight_rbm.append(rbm.components_.T) self.bias_rbm.append(rbm.intercept_hidden_) input_layer = rbm.transform(input_layer) return
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score) # Test numerical stability (#2785): would previously generate infinities # and crash with an exception. with np.errstate(under="ignore"): rbm1.score_samples(np.arange(1000) * 100)
def train(image_matrix, images): X = np.asarray(image_matrix, 'float32') Y = np.array(X.shape) X = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) rbm = BernoulliRBM(random_state=1, verbose=True) rbm.learning_rate = 0.09 rbm.n_iter = 1 rbm.n_components = 16 rbm.batch_size = 2 y_new = np.zeros(X.shape) for i in range(len(X)): x_new = rbm.fit(X[i]) y_new[i] = x_new.components_ global model model = { 'matrix': y_new, 'images': images }
def train_rbm(x, nh): rbm = BernoulliRBM(n_components=nh, verbose=True) rbm.fit(x) xh = transform(x, rbm) return xh, rbm
# import the necessary packages from sklearn.neural_network import BernoulliRBM import matplotlib.pyplot as plt from sklearn import datasets # load the MNIST dataset and apply min/max scaling to scale the pixel intensity # values to the range [0, 1] digits = datasets.load_digits() data = digits.data.astype("float") data = (data - data.min(axis=0)) / (data.max(axis=0) + 1e-5) # train the Restricted Boltzmann Machine on the data rbm = BernoulliRBM(n_components=64, learning_rate=0.05, n_iter=20, random_state=42, verbose=True) rbm.fit(data) # initialize the plot plt.figure() plt.suptitle("64 MNIST components extracted by RBM") # loop over the number of components generated by the RBM for (i, comp) in enumerate(rbm.components_): # construct a sub-plot for the component and display the image plt.subplot(8, 8, i + 1) plt.imshow(comp.reshape((8, 8)), cmap=plt.cm.gray_r, interpolation="nearest") plt.xticks([]) plt.yticks([]) # show the output plot plt.show()
for data_vector in csv_reader: data_matrix += [data_vector] return np.array(data_matrix) train_input = readData('bindigit_trn') train_target = readData('targetdigit_trn') test_input = readData('bindigit_tst') test_target = readData('targetdigit_tst') #create and train model rbm = BernoulliRBM(n_components=50, learning_rate=.2, batch_size=100, n_iter=20) rbm.fit(train_input, y=test_input) original = [] for image in range(len(first_number_index)): original += [train_input[first_number_index[image]]] reconstructed_boolean = rbm.gibbs(original) reconstructed = np.zeros(np.shape(reconstructed_boolean)) for i in range(np.shape(reconstructed_boolean)[0]): for j in range(np.shape(reconstructed_boolean)[1]): if reconstructed_boolean[i][j]: reconstructed[i][j] = 1 else: reconstructed[i][j] = 0 for idx in range(10): plt.imsave('images/rbm_org_' + str(idx) + '.png',
def learn(self, Xtrain, Ytrain): model = BernoulliRBM(n_components=self.hiddenLayers, learning_rate=self.learning_rate) self.model = model.fit(Xtrain, Ytrain) return self.model
# ====== gmm ====== # gmm = GaussianMixture(n_components=NUM_DIM, max_iter=100, covariance_type='full', random_state=SEED) gmm.fit(X_train) X_train_gmm = gmm._estimate_weighted_log_prob(X_train) X_score_gmm = gmm._estimate_weighted_log_prob(X_score) # ====== rbm ====== # rbm = BernoulliRBM(n_components=NUM_DIM, batch_size=8, learning_rate=0.0008, n_iter=8, verbose=2, random_state=SEED) rbm.fit(X_train) X_train_rbm = rbm.transform(X_train) X_score_rbm = rbm.transform(X_score) # =========================================================================== # Deep Learning # =========================================================================== # =========================================================================== # Visualize # =========================================================================== def plot(train, score, title, applying_pca=False): if applying_pca: pca = PCA(n_components=NUM_DIM) pca.fit(train)
def train(data, nComp, nIter): X = np.array(trainData) model = BernoulliRBM(n_components=nComp,n_iter=nIter,verbose=1) model.fit(X) return model
readCsvData() #print Train_X[0] #print final Test_X = Train_X[:15] Test_Y = Train_X[15:] print len(Test_X) #print Train_X #Remove all stopwords since all characters are taken #vectorizer=Tfidfvectorizer(stop_words=None) #Train_X=vectorizer.fit_transform(documents) X = np.array(Test_X) #print type(X) #print Train_X """num_Of_clusters=3 model= KMeans(n_clusters=num_Of_clusters,init='random',max_iter=1000,n_init=2) model.fit_transform(X) labels=model.labels_ order_centroids=model.cluster_centers_.argsort()[:, ::-1]""" """model=GMM(n_components=2) model.fit(X)""" model = BernoulliRBM(n_components=2) model.fit(X) #Predict the test label for new data. testLabels = model.transform(Test_Y) print testLabels
train_X, train_y = train[:, :-end], train[:, -end] print(train_X.shape, train_y.shape) test_X, test_y = test[:, :-end], test[:, -end] # reshape input to be 3D [samples, timesteps, features] train_X = train_X.reshape((train_X.shape[0], before*77)) test_X = test_X.reshape((test_X.shape[0], before*77)) print(train_X.shape, train_y.shape, test_X.shape, test_y.shape) # dbn input_layer = train_X hidden_layer=[250,500,200] weight_rbm = [] bias_rbm = [] for i in range(len(hidden_layer)): print("DBN Layer {0} Pre-training".format(i + 1)) rbm = BernoulliRBM(n_components=hidden_layer[i],learning_rate=0.0005,batch_size=512,n_iter=200,verbose=2,random_state=1) rbm.fit(input_layer) # size of weight matrix is [input_layer, hidden_layer] weight_rbm.append(rbm.components_.T) bias_rbm.append(rbm.intercept_hidden_) input_layer = rbm.transform(input_layer) print('Pre-training finish.',np.shape(weight_rbm[0]),np.shape(bias_rbm[0])) test_rms = 0 result = [] model = Sequential() print('Fine-tuning start.') for i in range(0, len(hidden_layer)): print('i:',i) if i == 0: model.add(Dense(hidden_layer[i], activation='sigmoid',input_dim=np.shape(train_X)[1])) elif i >= 1: model.add(Dense(hidden_layer[i], activation='sigmoid'))