def train(X_train, y_train, batch_size, n_batches): model = mlp(hidden_layer_sizes=(1024, 2048, 1024, 512, 256, 256, 128, 64), max_iter=10000) train_size = np.shape(X_train)[0] min_loss = 1e20 for iterator in tqdm(range(n_batches)): idx = np.random.randint(0, train_size, size=batch_size) X_select = X_train[idx, :] y_select = y_train[idx] model.partial_fit(X_select, y_select) sentence_predicted_scores = get_values(X_val, model) loss = get_loss(sentence_predicted_scores, y_val) # saving best model seen so far if loss < min_loss: min_loss = loss pickle.dump(model, open(model_name + '_best_model', 'wb')) final_model = pickle.load(open(model_name + '_best_model', 'rb')) return final_model
def executeAlgorithm(algorithm, dataset): classifier = None model = None if (dataset == 1): data, targets, classes = get_iris() elif (dataset == 2): data, targets, classes = get_diabetes() count = 0 #split dataset into random parts train_data, test_data, train_target, test_target = train_test_split( data, targets, test_size=.3) if (algorithm == 7): classifier = NeuralNetwork() model = classifier.fit(train_data, train_target, classes) else: classifier = mlp() model = classifier.fit(train_data, train_target) #target_predicted is an array of predictions that is received by the predict target_predicted = model.predict(test_data) #loop through the target_predicted and count up the correct predictions for index in range(len(target_predicted)): #increment counter for every match from #target_predicted and test_target if target_predicted[index] == test_target[index]: count += 1 accuracy = get_accuracy(count, len(test_data)) print("Accuracy: {:.2f}%".format(accuracy))
def __init__(self, fname): """ instantiate ensemble of neural nets using sklearn """ if os.path.isdir(fname): open_name = "{}/netparams-{}.pckl".format(fname, fname) else: open_name = fname with open(open_name, "rb") as f: data = pickle.load(f) f.close() ensemble = [mlp(hidden_layer_sizes=data["hidden_layer_sizes"],activation=data["activation"]) for \ ii in range(data["Nensemble"])] for model in range(data["Nensemble"]): ensemble[model]._random_state = check_random_state( ensemble[model].random_state) ensemble[model]._initialize(y=np.zeros((1, 2)), layer_units=data["layer_units"]) for ii in range(len(data["layer_units"]) - 1): ensemble[model].coefs_[ii] = data["weights"][model][ii] ensemble[model].intercepts_[ii] = data["biases"][model][ii] self.ensemble = ensemble self.Nens = data["Nensemble"] self.Xdim = data["layer_units"][0] self.preconditioning = data["preconditioning"]
def MLP(data_directory, model_dir, features): X_train, X_test, y_train, y_test, predict_X, features = pre( data_directory, features) os.chdir(model_dir) model = mlp(random_state=1, max_iter=10000) grid = gs(estimator=model, param_grid={ 'hidden_layer_sizes': [(500, 500)], 'activation': ['logistic', 'tanh', 'relu'], 'alpha': np.exp(2.303 * np.arange(-8, 0)), 'learning_rate': ['constant'] }, cv=5, n_jobs=6) grid.fit(X_train, y_train) print(grid.best_params_) print(grid.best_estimator_.score(X_test, y_test)) joblib.dump( grid.best_estimator_, 'mlp_%d_%.4f.m' % (len(features), grid.best_estimator_.score(X_test, y_test))) df = pd.DataFrame(columns=['ml_bandgap', 'pbe_bandgap']) df['pbe_bandgap'] = y_test df['ml_bandgap'] = grid.best_estimator_.predict(X_test) print(df)
def predict_mlp(X_train, X_test, y_train, y_test): clf=mlp() print("mlp started") clf.fit(X_train,y_train) y_pred=clf.predict(X_test) calc_accuracy("MLP classifier",y_test,y_pred) return clf
def neural_networks(M, m, D, d, feature_mean, diag, accuracy): from sklearn.neural_network import MLPClassifier as mlp training_start = t.time() nn = mlp() nn.fit(M, D) training_end = t.time() print("\nNeural Networks\nTraining time: {0:.0000001} sec".format( training_end - training_start)) testing_start = t.time() p = nn.predict(m) testing_end = t.time() print("Testing/Prediction time: {0:.0000001} sec".format(testing_end - testing_start)) validation = [] validation = cross_val_score(nn, feature_mean, diag, cv=5) accuracy.append(accuracy_score(p, d)) print("Accuracy: {0:.01%}".format(accuracy_score(p, d))) print("Cross validation result: {0:.01%} (+/- {1:.01%})".format( num.mean(validation), num.std(validation) * 2)) print(classification_report(d, p))
def _neural_network(self, X, y): '''_NEURAL_NETWORK Train MLP to do final classification. Inputs: ------- - X: numpy ndarray, features array. - y: numpy ndarray, labels list. Output: ------- - clf: instandce of MLPClassifier, trained model. ''' # Train MLP clf = mlp(**self.mlp_paras) clf.fit(X, y) # To save trained model # import joblib at the begining of this script # joblib.dump(clf, "mlp.pkl") return clf
def _mlp(t, min_freq, save=False): if save: clf = mlp().fit(records, labels) save_classifier(clf, t, 'mlp', min_freq) return ('mlp', clf) else: clf = load_classifier(t, 'mlp', min_freq) return ('mlp', clf)
def classifica_bd(grupos, attr_cluster, porc_trein, folds): result = [] for grupo in grupos: data = grupo.drop([attr_cluster], axis=1) clt = grupo[attr_cluster].unique() classif = classificador(mlp(max_iter=2000), data, porc_trein, folds) result.append((clt, classif.acuracia)) return result
def __get_base_estimator__(self): return mlp(hidden_layer_sizes=(100), activation='relu', solver='adam', learning_rate='constant', learning_rate_init=1e-3, early_stopping=True, max_iter=2000)
def __init__(self, state_set_size, action_set_size, epsilon): self.states = state_set_size self.actions = action_set_size self.memory = [] self.last_100 = [] X_train = np.ones((1, state_set_size)) Y_train = (np.array([1, 0])).reshape((1, action_set_size)) self.model = mlp(random_state=2, max_iter=1000) self.model.partial_fit(X_train, Y_train, classes=np.unique(Y_train))
def buildClassifier(self, X_features, Y_train): clf = mlp( solver='sgd', learning_rate_init=0.1, max_iter=20, hidden_layer_sizes=(100, ), random_state=1, verbose=True) clf.fit(X_features, Y_train) return clf
def create_model(self, model_type, parameters): if model_type == 'lr': model = lr() elif model_type == 'svm': model = svm() elif model_type == 'mlp': model = mlp() elif model_type == 'rf': model = rf() elif model_type == 'xgb': model = xgb() return model.set_params(**parameters)
def select_classify(): return [ naive(), tree(criterion="entropy"), knn(n_neighbors=8, weights='uniform', metric="manhattan"), mlp(hidden_layer_sizes=(128, ), alpha=0.01, activation='tanh', solver='sgd', max_iter=300, learning_rate='constant', learning_rate_init=0.001) ]
def add_model(self, model_type): if model_type == 'lr': self.models.append((model_type, lr(normalize=True))) elif model_type == 'ridge': self.models.append((model_type, rc(normalize=True, cv=None))) elif model_type == 'lasso': self.models.append((model_type, la(normalize=True))) elif model_type == 'svm': self.models.append((model_type, svm())) self.param_grid['svm'] = { 'kernel': ['rbf'], 'C': range(10, 100, 10), 'epsilon': [0.01] } elif model_type == 'mlp': self.models.append((model_type, mlp())) self.param_grid['mlp'] = { 'hidden_layer_sizes': [(16, 16, 16, 16, 16), (16, 16, 16, 16)], 'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs', 'adam'], 'alpha': [0.001, 0.01], 'learning_rate': ['constant', 'invscaling', 'adaptive'], 'learning_rate_init': [0.001, 0.01, 0.1], #'early_stopping':[True,False], #'validation_fraction':[0.1,0.05,0.2], #'max_iter':[200,1000,2000] } elif model_type == 'xgb': self.models.append((model_type, xgb())) self.param_grid[model_type] = { 'max_depth': range(5, 15, 2), 'min_child_weight': range(1, 6, 2), 'n_estimators': range(10, 50, 10), 'learning_rate': [0.01, 0.05, 0.1], 'n_jobs': [4], 'reg_alpha': [0, 0.005, 0.01], 'subsample': [0.8, 1], 'colsample_bytree': [0.8, 1] } elif model_type == 'rf': self.models.append((model_type, rf())) self.param_grid[model_type] = { 'n_estimators': [10, 100, 500], #'max_depth':range(3,10,2), #'min_child_weight':range(1,6,2), #'learning_rate':[0.01,0.05,0.1] }
def get_classifier(self, trusted_triples): model = KeyedVectors.load_word2vec_format(self.vectors_model, binary=True) X = [] y = [] for (s, p, o, suorce, support, abstracts) in trusted_triples: skey = self.clean_for_embeddings(s).replace(' ', '_') okey = self.clean_for_embeddings(o).replace(' ', '_') if skey in model and okey in model: vec = np.concatenate((model[skey], model[okey]), axis=None) X += [vec] y += [p] X = np.array(X) clf = mlp(hidden_layer_sizes=(100, )) clf.fit(X, y) return clf
def init_model(modeltype): if modeltype == 'mlp': ### Feedforward Neural Network Regression Model regression_model = mlp(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', alpha=0.5, batch_size='auto', learning_rate='adaptive', learning_rate_init=0.001, power_t=0.5, max_iter=1000, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10) elif modeltype == 'svm': ### Support Vector Machine Regression Model regression_model = svm(kernel='rbf', C=1e6, epsilon=0.1, gamma='auto', tol=0.001, cache_size=2000, shrinking=True, verbose=False, max_iter=-1) return regression_model
def model_stop(df): """ Create a model from a dataframe. """ #create a model from a dataframe #df = pd.get_dummies(df,columns=['day']) #features = ['day_'+str(i) for i in range(0,7)] #for f in features: # if f not in df.columns: # df[f] = 0i df = df[df['traveltime'] > 0] X = df[df['traveltime'] < df['traveltime'].quantile(0.95)] X = df[df['traveltime'] > df['traveltime'].quantile(0.05)] features = ['rain', 'temp', 'hour', 'day'] scaler_X = ss() X = scaler_X.fit_transform(df[features]) scaler_Y = ss() Y_real = df['traveltime'] Y = scaler_Y.fit_transform(df['traveltime'].values.reshape(-1, 1)) model = mlp().fit(X, Y) return model, X, features, scaler_X, scaler_Y, Y_real
scale.fit(X_train) X_train = scale.transform(X_train) X_test = scale.transform(X_test) #while training on whole dataset, trained the whole dataset on the performance of svc scale2 = StandardScaler() scale2.fit(X) X = scale2.transform(X) #testing score of multi layered perceptron from sklearn.neural_network import MLPClassifier as mlp from sklearn.model_selection import learning_curve cv_score = learning_curve( mlp(activation='logistic', hidden_layer_sizes=(25, 1), solver='lbfgs', alpha=.01, max_iter=400), X_train, y_train) #training score of multi layered perceptron classifier = mlp(activation='logistic', hidden_layer_sizes=(50, 6), solver='lbfgs', alpha=.1, max_iter=400) classifier.fit(X_train, y_train) #evaluation of mlp from sklearn.metrics import confusion_matrix as cmm cmm(y_test, classifier.predict(X_test))
def train(X, y): model = mlp(hidden_layer_sizes = (1024, 2048, 1024, 512, 256), max_iter = 100) model.fit(X, y) return model
def bpnn(X, Y, epochs=20): model = mlp(max_iter=epochs, verbose=True) X_tr, X_te, Y_tr, Y_te = train_test_split(X, Y) model.fit(X_tr, Y_tr) print(model.score(X_te, Y_te))
y_train, test_size=.1, random_state=42) testdata = pd.read_csv( '/mnt/d/Work/Acad/BTP/data/testGreenBit/feature_patches.csv', header=None) # testdata = pd.read_csv('/mnt/d/Work/Acad/BTP/data/testGreenBit/fractal_feature.csv',header=None) # testdata2 = pd.read_csv('/mnt/d/Work/Acad/BTP/data/testGreenBit/feature.csv',header=None) # testdata = testdata.join(testdata2.iloc[:,2:], lsuffix='_caller', rsuffix='_other') testdata.dropna(inplace=True) X = testdata.iloc[:, 3:] y = testdata.iloc[:, 2] X = scaler.transform(X) clf = mlp(hidden_layer_sizes=( 100, 20, ), max_iter=1000, verbose=1) clf = clf.fit(X_train, y_train) print('Validation Acc: ', clf.score(X_val, y_val)) print('Test Acc: ', clf.score(X, y)) y_test_prob = pd.DataFrame(clf.predict_proba(X)) sample = pd.DataFrame(testdata.iloc[:, 1]).reset_index() joined = sample.join(y_test_prob, lsuffix='a') prob_pred = joined.groupby(['1a']).mean() joined = sample.join(pd.get_dummies(y).reset_index(), lsuffix='a') prob_true = joined.groupby(['1a']).mean() RCF = rcf() RCF = RCF.fit(X_train, y_train) print(RCF.score(X_train, y_train))
from sklearn import model_selection from sklearn.metrics import classification_report from sklearn.neural_network import MLPClassifier as mlp import time start_time = time.time() input_file = 'seeds_dataset.txt' data = np.loadtxt(input_file, delimiter=',') X, y = data[:, :-1], data[:, -1] X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, test_size=0.25, random_state=5) params = {'random_state': 0, 'hidden_layer_sizes': 50, 'max_iter': 2000} classifier = mlp(**params) classifier.fit(X_train, y_train) y_test_pred = classifier.predict(X_test) print("\n" + "#" * 40) print("\nClassifier performance on training dataset\n") print(classification_report(y_train, classifier.predict(X_train))) print("#" * 40 + "\n") print("#" * 40) print("\nClassifier performance on test dataset\n") print(classification_report(y_test, y_test_pred)) print("#" * 40 + "\n") # print running time
round(clf.score(dataset_train_x, dataset_train_y), 2))) print("SVM Testing Score: {}".format( round(clf.score(dataset_test_x, dataset_test_y), 2))) # Multi-level Perceptron Neural Network clf = mlp(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=(10, 2), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) clf.fit(dataset_train_x, dataset_train_y) print("Neural Network Training Score: {}".format( round(clf.score(dataset_train_x, dataset_train_y), 2))) print("Neural Network Testing Score: {}".format( round(clf.score(dataset_test_x, dataset_test_y), 2)))
clf = LogisticRegression(solver='liblinear').fit(X_train, y_train) # In[9]: print(clf.score(X_train, y_train)) pred = clf.predict(X_test) y1 = np.ones(y_test.shape) #print(y1) print((clf.score(X_test, y_test))) #print((clf.score(X_test,y1))) # In[10]: clf2 = mlp(hidden_layer_sizes=(1000, 1000, 1000), activation='tanh', solver='adam', max_iter=2000) clf2.fit(X_train, y_train) print(clf2.score(X_train, y_train)) print(clf2.score(X_test, y_test)) # In[ ]: # In[11]: clf3 = mlp(hidden_layer_sizes=(64, 32, 64), activation='tanh', solver='adam', max_iter=500) clf3.fit(X_train, y_train) print(clf3.score(X_train, y_train))
import matplotlib.pyplot as plt print("Reading data") start = t.time() x = pd.read_csv(os.path.join('Data','Csv','spectrum.csv')) y = pd.read_csv(os.path.join('Data','Csv','labels.csv')) elapsed = t.time() - start print("Done reading data: " + "{0:.2f}".format(elapsed) + "s") print("Training") start = t.time() timeslices = y["time in seconds"] y = y.drop(["time in seconds"],1) x = x.drop(["time in seconds"],1) x_train, x_test, y_train, y_test = train_test_split(x,y, test_size= 0.25, random_state=27) model = mlp(hidden_layer_sizes=(100,100),solver='sgd',verbose=True,power_t=0.99) model.fit(x_train,y_train) elapsed = t.time() - start print("Done training: " + "{0:.2f}".format(elapsed) + "s") y_pred = model.predict(x_test) print("Training set score: %f" % model.score(x_train, y_train)) print("Test set score: %f" % model.score(x_test, y_test)) txt = True if(txt): header = "0" for value in range(127): header += "," + str(value+1) header += "\n" with open(os.path.join("Data","Output","Research.csv"), "w", newline='') as result_csv: result_csv.write(header)
from sklearn.model_selection import train_test_split from sklearn import preprocessing from sklearn.neural_network import MLPClassifier as mlp from sklearn.utils import shuffle # 从CSV文件读取训练数据 df = pd.DataFrame(pd.read_csv('/mnt/sd01/sjjs_bj14/contest1/contest1_training.csv',header=None)) x_train = df.ix[:,1:] # 第2列往后是数据 y_train = df.ix[:,0] # 第1列是标签 # 归一化 min_max_scaler = preprocessing.MinMaxScaler() x_tr_arr = x_train.values x_tr_scaled = min_max_scaler.fit_transform(x_tr_arr) x_train = pd.DataFrame(x_tr_scaled) # 开始训练 model = mlp(activation='tanh',solver='adam',alpha=0.0001,learning_rate='adaptive',learning_rate_init=0.001,max_iter=200) model.fit(x_train,y_train) # 从CSV文件读取测试数据 df = pd.DataFrame(pd.read_csv('/mnt/sd01/sjjs_bj14/contest1/contest1_forecast.csv',header=None)) x = df.ix[:,1:] # 第2列往后是数据 label = df.ix[:,0] # 第1列是标签 # 归一化 x_arr = x.values x_scaled = min_max_scaler.fit_transform(x_arr) x = pd.DataFrame(x_scaled) # 开始预测 y = model.predict(x) # 写结果文件 for i in range(10000): f = open('/mnt/sd01/sjjs_bj14/ans.csv','r+') f.read()
'r').read()) route = routes['15'][1] models = [] features = ['day', 'month', 'hour', 'weekend', 'vappr'] for i in range(1, len(route) - 1): stopA = str(route[i]) stopB = str(route[i + 1]) print('Building for', stopA, 'to', stopB) df = stop_tools.stop_data(stopA, stopB) df['traveltime'] = df['actualtime_arr_to'] - df['actualtime_arr_from'] df['weekend'] = df['day'] > 4 print(df['traveltime'].mean()) Y = numpy.array([i for i in df['traveltime']]).reshape(-1, 1) transformer2 = mms().fit(Y) Y = transformer2.transform(Y) transformer1 = mms().fit(df[features]) X = transformer1.transform(df[features]) import numpy model = mlp(hidden_layer_sizes=(40, 40, 40)).fit(X, Y) models.append({ 'transformer': transformer1, 'transformer2': transformer2, 'model': model }) del (df) del (X) del (Y) with open('/data/chained_models_neural.bin', 'wb') as handle: import pickle pickle.dump(models, handle, protocol=pickle.HIGHEST_PROTOCOL)
# In[6]: from sklearn.linear_model.logistic import LogisticRegression as LR from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier as RF from sklearn.ensemble import GradientBoostingClassifier as GBDT from sklearn.ensemble import AdaBoostClassifier as AdaBoost from sklearn.ensemble import BaggingClassifier from sklearn.ensemble import ExtraTreesClassifier as etc from sklearn.neighbors import KNeighborsClassifier as knc from sklearn.neural_network import MLPClassifier as mlp valid_data = data[3200:].reset_index() clf_gender = mlp(hidden_layer_sizes=(2, 1), verbose=0, activation='tanh') clf_gender.fit(f_tfidf[:3200], data.gender[:3200]) valid_data.gender = clf_gender.predict(f_tfidf[3200:]) # clf_age_pre = LR() # clf_age_pre.fit(f_tfidf[:3200], data.age[:3200]) clf_age = GBDT(n_estimators=300, verbose=1) clf_age.fit(f_tfidf[:3200], data.age[:3200]) valid_data.age = clf_age.predict(f_tfidf[3200:]) clf_location = GBDT(n_estimators=300, verbose=1) clf_location.fit(f_tfidf[:3200], data.location[:3200]) valid_data.location = clf_location.predict(f_tfidf[3200:]) # # 输出到temp.csv
def treinar(self, x, y): clf = mlp(max_iter=2000) clf.fit(x, y) return clf