def mlp_bench(x_train, y_train, x_test, fh): """ Forecasts using a simple MLP which 6 nodes in the hidden layer :param x_train: train input data :param y_train: target values for training :param x_test: test data :param fh: forecasting horizon :return: """ y_hat_test = [] model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam', max_iter=100, learning_rate='adaptive', learning_rate_init=0.001, random_state=42) model.fit(x_train, y_train) last_prediction = model.predict(x_test)[0] for i in range(0, fh): y_hat_test.append(last_prediction) x_test[0] = np.roll(x_test[0], -1) x_test[0, (len(x_test[0]) - 1)] = last_prediction last_prediction = model.predict(x_test)[0] return np.asarray(y_hat_test)
def test_multioutput_regression(): # Test that multi-output regression works as expected X, y = make_regression(n_samples=200, n_targets=5) mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200, random_state=1) mlp.fit(X, y) assert_greater(mlp.score(X, y), 0.9)
def _create_first_population(self): self._current_population = [] for _ in range(self._n_individuals): mlp = MLPRegressor(hidden_layer_sizes = self._nn_architecture, alpha=10**-10, max_iter=1) mlp.fit([np.random.randn(self._n_features)], [np.random.randn(self._n_actions)]) mlp.out_activation_ = 'softmax' self._current_population.append([mlp,0])
def construct_train(train_length, **kwargs): """ Train and test model with given input window and number of neurons in layer """ start_cur_postion = 0 steps, steplen = observations.size/(2 * train_length), train_length if 'hidden_layer' in kwargs: network = MLPRegressor(hidden_layer_sizes=kwargs['hidden_layer']) else: network = MLPRegressor() quality = [] # fit model - configure parameters network.fit(observations[start_cur_postion:train_length][:, 1].reshape(1, train_length), observations[:, 1][start_cur_postion:train_length].reshape(1, train_length)) parts = [] # calculate predicted values # for each step add all predicted values to a list # TODO: add some parallelism here for i in xrange(0, steps): parts.append(network.predict(observations[start_cur_postion:train_length][:, 1])) start_cur_postion += steplen train_length += steplen # estimate model quality using result = np.array(parts).flatten().tolist() for valnum, value in enumerate(result): quality.append((value - observations[valnum][1])**2) return sum(quality)/len(quality)
def test_lbfgs_regression(): # Test lbfgs on the boston dataset, a regression problems.""" X = Xboston y = yboston for activation in ACTIVATION_TYPES: mlp = MLPRegressor(algorithm='l-bfgs', hidden_layer_sizes=50, max_iter=150, shuffle=True, random_state=1, activation=activation) mlp.fit(X, y) assert_greater(mlp.score(X, y), 0.95)
def GetOptimalCLF2(train_x,train_y,rand_starts = 8): ''' Gets the optimal CLF function based on fixed settings Parameters ------------------------ train_x - np.array Training feature vectors train_y - np.array Training label vectors rand_starts - int Number of random starts to do Default - 8 for 95% confidence and best 30% Returns ------------------------ max_clf - sklearn function Optimal trained artificial neuron network ''' #### Get number of feature inputs of training vector n_input = train_x.shape[1] #### Set initial loss value min_loss = 1e10 #### Perform number of trainings according to random start set for i in range(rand_starts): #### Print current status print "Iteration number {}".format(i+1) #### Initialize ANN network clf = MLPRegressor(hidden_layer_sizes = (int(round(2*np.sqrt(n_input),0)),1), activation = 'logistic',solver = 'sgd', learning_rate = 'adaptive', max_iter = 100000000,tol = 1e-10, early_stopping = True, validation_fraction = 1/3.) #### Fit data clf.fit(train_x,train_y) #### Get current loss cur_loss = clf.loss_ #### Save current clf if loss is minimum if cur_loss < min_loss: #### Set min_loss to a new value min_loss = cur_loss #### Set max_clf to new value max_clf = clf return max_clf
def MLP_Regressor(train_x, train_y): clf = MLPRegressor( alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=([8,8]), learning_rate='constant', learning_rate_init=0.01, max_iter=500, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) clf.fit(train_x, train_y) #score = metrics.accuracy_score(clf.predict((train_x)), (train_y)) #print(score) return clf
def test_lbfgs_regression(): # Test lbfgs on the boston dataset, a regression problems. X = Xboston y = yboston for activation in ACTIVATION_TYPES: mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=150, shuffle=True, random_state=1, activation=activation) mlp.fit(X, y) if activation == 'identity': assert_greater(mlp.score(X, y), 0.84) else: # Non linear models perform much better than linear bottleneck: assert_greater(mlp.score(X, y), 0.95)
def train_model(x_train, y_train, alpha=1e-3, hid_layers=[512], max_iter=100): """ Train model on training data. :param x_train: training examples :param y_train: target variables :param alpha: L2 regularization coefficient :param hid_layers: hidden layer sizes :param max_iter: maximum number of iterations in L-BFGS optimization :return a model trained with neuron network """ nn_model = MLPRegressor(solver='lbgfs', hidden_layer_sizes=hid_layers, alpha=alpha, max_iter=max_iter, activation="relu", random_state=1) nn_model.fit(x_train, y_train) return nn_model
def __init__(self, num_inputs, num_outputs): self.nx = num_inputs self.ny = num_outputs self.net = MLPRegressor(hidden_layer_sizes=(50, 10), max_iter=1, algorithm='sgd', learning_rate='constant', learning_rate_init=0.001, warm_start=True, momentum=0.9, nesterovs_momentum=True ) self.initialize_network() # set experience replay self.mbsize = 128 # mini-batch size self.er_s = [] self.er_a = [] self.er_r = [] self.er_done = [] self.er_sp = [] self.er_size = 2000 # total size of mb, impliment as queue self.whead = 0 # write head
def __init__(self): self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] # Parámetro de TD-lambda self.lambdaCoefficient = 0.9
def train(self): print("DEB Training with TSnew") self.MLP = MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=len(self.TSnew_Y.columns), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) self.MLP.fit(self.TSnew_X, self.TSnew_Y)
class Ann: def __init__(self): self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] self.lambdaCoefficient = 0.9 def evaluar(self, entrada): return self._nn.predict(entrada) def agregar_a_entrenamiento(self, tableros, resultado): tableros.reverse() for i in xrange(len(tableros)): tablero, valorEstimado = tableros[i][0], tableros[i][1] self._entradas_entrenamiento.append(tablero) if i == 0 or True: self._salidas_esperadas_entrenamiento.append(resultado.value) else: valorAAprender = valorEstimado + self.lambdaCoefficient * (self._salidas_esperadas_entrenamiento[i-1] - valorEstimado) self._salidas_esperadas_entrenamiento.append(valorAAprender) def entrenar(self): self._nn.partial_fit(self._entradas_entrenamiento, self._salidas_esperadas_entrenamiento) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] def almacenar(self): pickle.dump(self._nn, open(self.path,'wb')) def cargar(self, path, red): self.path = path if os.path.isfile(path): self._nn = pickle.load(open(path, 'rb')) else: self._nn = red tableroVacio = ([EnumCasilla.EMPTY.value for _ in xrange(64)],0) self.agregar_a_entrenamiento([tableroVacio], EnumResultado.EMPATE) self.entrenar()
def _create_new_nn(self, weights, biases): mlp = MLPRegressor(hidden_layer_sizes = self._nn_architecture, alpha=10**-10, max_iter=1) mlp.fit([np.random.randn(self._n_features)], [np.random.randn(self._n_actions)]) mlp.coefs_ = weights mlp.intercepts_ = biases mlp.out_activation_ = 'softmax' return mlp
def test_partial_fit_regression(): # Test partial_fit on regression. # `partial_fit` should yield the same results as 'fit' for regression. X = Xboston y = yboston for momentum in [0, .9]: mlp = MLPRegressor(solver='sgd', max_iter=100, activation='relu', random_state=1, learning_rate_init=0.01, batch_size=X.shape[0], momentum=momentum) with warnings.catch_warnings(record=True): # catch convergence warning mlp.fit(X, y) pred1 = mlp.predict(X) mlp = MLPRegressor(solver='sgd', activation='relu', learning_rate_init=0.01, random_state=1, batch_size=X.shape[0], momentum=momentum) for i in range(100): mlp.partial_fit(X, y) pred2 = mlp.predict(X) assert_almost_equal(pred1, pred2, decimal=2) score = mlp.score(X, y) assert_greater(score, 0.75)
cur_nester = False if parameters[7] == 1: cur_nester = True cur_momentum = parameters[6] reg = MLPRegressor(hidden_layer_sizes=hidden_layers, activation="relu", solver=cur_solver, alpha=parameters[2], batch_size='auto', learning_rate=cur_learning_rate, learning_rate_init=parameters[4], power_t=parameters[5], max_iter=200, shuffle=True, random_state=None, tol=parameters[8], verbose=False, warm_start=False, momentum=cur_momentum, nesterovs_momentum=cur_nester, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10) #score = cross_val_score(reg, partx, party, cv=3,n_jobs= multiprocessing.cpu_count()) #print(np.mean(score)) reg.fit(partx, party)
def main(): cal_housing = fetch_california_housing() X, y = cal_housing.data, cal_housing.target names = cal_housing.feature_names # Center target to avoid gradient boosting init bias: gradient boosting # with the 'recursion' method does not account for the initial estimator # (here the average target, by default) y -= y.mean() print("Training MLPRegressor...") est = MLPRegressor(activation='logistic') est.fit(X, y) print('Computing partial dependence plots...') # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower # with the brute method. features = [0, 5, 1, 2] plot_partial_dependence(est, X, features, feature_names=names, n_jobs=3, grid_resolution=50) fig = plt.gcf() fig.suptitle('Partial dependence of house value on non-location features\n' 'for the California housing dataset, with MLPRegressor') plt.subplots_adjust(top=0.9) # tight_layout causes overlap with suptitle print("Training GradientBoostingRegressor...") est = GradientBoostingRegressor(n_estimators=100, max_depth=4, learning_rate=0.1, loss='huber', random_state=1) est.fit(X, y) print('Computing partial dependence plots...') features = [0, 5, 1, 2, (5, 1)] plot_partial_dependence(est, X, features, feature_names=names, n_jobs=3, grid_resolution=50) fig = plt.gcf() fig.suptitle('Partial dependence of house value on non-location features\n' 'for the California housing dataset, with Gradient Boosting') plt.subplots_adjust(top=0.9) print('Custom 3d plot via ``partial_dependence``') fig = plt.figure() target_feature = (1, 5) pdp, axes = partial_dependence(est, X, target_feature, grid_resolution=50) XX, YY = np.meshgrid(axes[0], axes[1]) Z = pdp[0].T ax = Axes3D(fig) surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1, cmap=plt.cm.BuPu, edgecolor='k') ax.set_xlabel(names[target_feature[0]]) ax.set_ylabel(names[target_feature[1]]) ax.set_zlabel('Partial dependence') # pretty init view ax.view_init(elev=22, azim=122) plt.colorbar(surf) plt.suptitle('Partial dependence of house value on median\n' 'age and average occupancy, with Gradient Boosting') plt.subplots_adjust(top=0.9) plt.show()
def main(args=None): args = arg_parser().parse_args(args) if args.verbosity == 1: level = logging.getLevelName('INFO') elif args.verbosity >= 2: level = logging.getLevelName('DEBUG') else: level = logging.getLevelName('WARNING') logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=level) logger = logging.getLogger(__name__) try: np.random.seed(args.random_seed) if args.regr_type == 'rf': from sklearn.ensemble import RandomForestRegressor regr = RandomForestRegressor( n_jobs=args.n_jobs, min_samples_leaf=args.min_samp_leaf, n_estimators=args.n_trees, max_features=args.max_features, max_depth=args.max_depth, random_state=args.random_seed, verbose=1 if args.verbosity >= 2 else 0) flatten = True elif args.regr_type == 'xg': try: from xgboost import XGBRegressor except ImportError: logger.warn('Need to install xgboost to use xg option') raise regr = XGBRegressor( n_jobs=args.n_jobs, n_estimators=args.n_trees, random_state=args.random_seed, max_depth=3 if args.max_depth is None else args.max_depth, silent=False if args.verbosity >= 2 else True) flatten = True elif args.regr_type == 'pr': from sklearn.linear_model import LinearRegression regr = LinearRegression( n_jobs=args.n_jobs, fit_intercept=True if args.poly_deg is None else False) flatten = False elif args.regr_type == 'mlr': from synthit.synth.mlr import LinearRegressionMixture regr = LinearRegressionMixture(3, num_restarts=args.num_restarts, num_workers=args.n_jobs, max_iterations=args.max_iterations, threshold=args.threshold) args.poly_deg = 1 if args.poly_deg is None else args.poly_deg # hack to get bias term included in features flatten = True elif args.regr_type == 'mlp': from sklearn.neural_network import MLPRegressor regr = MLPRegressor(hidden_layer_sizes=args.hidden_layer_sizes, max_iter=args.max_iterations, random_state=args.random_seed, verbose=True if args.verbosity >= 2 else False) flatten = True else: raise SynthError( 'Invalid regressor type: {}. {{rf, xg, pr, mlr, mlp}} are the only supported options.' .format(args.regr_type)) logger.debug(regr) ps = PatchSynth(regr, args.patch_size, args.n_samples, args.ctx_radius, args.threshold, args.poly_deg, args.mean, args.full_patch, flatten, args.use_xyz) source = [ps.image_list(sd) for sd in args.source_dir] target = ps.image_list(args.target_dir) if any([len(source_) != len(target) for source_ in source]): raise SynthError( 'Number of source and target images must be equal.') if args.mask_dir is not None: masks = ps.image_list(args.mask_dir) if len(masks) != len(target): raise SynthError( 'If masks are provided, the number of masks must be equal to the number of images.' ) source = [[ nib.Nifti1Image(src.get_data() * mask.get_data(), src.affine, src.header) for (src, mask) in zip(source_, masks) ] for source_ in source] target = [ nib.Nifti1Image(tgt.get_data() * mask.get_data(), tgt.affine, tgt.header) for (tgt, mask) in zip(target, masks) ] else: masks = [None] * len(target) if not args.cross_validate: ps.fit(source, target, masks) outfile = 'trained_model.pkl' if args.output is None else args.output logger.info('Saving trained model: {}'.format(outfile)) joblib.dump(ps, outfile) else: for i in range(len(target)): src = [[src_ for k, src_ in enumerate(source_) if i != k] for source_ in source] tgt = [tgt_ for k, tgt_ in enumerate(target) if i != k] msk = [msk_ for k, msk_ in enumerate(masks) if i != k] ps.fit(src, tgt, msk) if args.output is not None: name, ext = os.path.splitext(args.output) outfile = name + '_{}'.format(i) + ext else: outfile = 'trained_model_{}.pkl'.format(i) logger.info('Saving trained model: {}'.format(outfile)) joblib.dump(ps, outfile) return 0 except Exception as e: logger.exception(e) return 1
class NeuralNetwork: ################# Fields ####################### # dataset_filename: string - path to dataset # header: list - header of the dataset # enumerable_columns: list - the enumerable columns # df: matrix - data set # training_set: matrix - training set # test_set: matrix - test set # TSnew_X: matrix - training set of TSnew (see documentation) # TSnew_Y: matrix - training set of TSnew (see documentation) # dim_random_subset: int - number of features to set to 0 (see documentation) # repeatSometimes: int - number of for cicles (see documentation) def __init__(self, repeatSometimes = 2, dim_random_subset = 2): # variables initialization self.enumerable_columns = [] self.dataset_filename = "" self.header = [] self.df = pandas.DataFrame() self.trainSet = pandas.DataFrame() self.testSet = pandas.DataFrame() self.TSnew_X = pandas.DataFrame() self.TSnew_Y = pandas.DataFrame() self.repeatSometimes = repeatSometimes self.dim_random_subset = dim_random_subset # This code really needs much time and therefore I save some computations if not os.path.isfile('trainSet{}-{}.csv'.format(repeatSometimes, dim_random_subset)): self.readDataset() self.discretization() self.preprocess() # creating TSnew self.createTrainingAndTestSet() self.createTSnew() # backup encoded sets self.writeCSV() else: self.readCSV() # training and test self.train() self.predict() def readDataset(self): print("DEB Read dataset") with open('header.txt') as f: self.header = f.read().split(',') print(self.header) with open('dataset.txt') as f: self.dataset_filename = f.read() print(self.dataset_filename) self.df = pandas.read_csv(self.dataset_filename, names=self.header) print('Dataset with {} entries'.format(self.df.__len__())) ############# Preprocessing ########################## # helper function (should not be called from other functions) def discretize(self, column): print("DEB Discretize column " + column) sorted_col = sorted(column) l = len(column) n = int(numpy.floor(l / 2)) if l % 2 == 0: median_1 = numpy.median(sorted_col[0:n]) median_2 = numpy.median(sorted_col[n:]) else: median_1 = numpy.median(sorted_col[0:(n + 1)]) median_2 = numpy.median(sorted_col[(n + 1):]) iqr = median_2 - median_1 h = 2 * iqr * (1 / numpy.cbrt(l)) if h > 0: bins_number = numpy.ceil((column.max() - column.min()) / h) new_col, bins = pandas.cut(column, bins_number, labels=False, retbins=True, include_lowest=False) else: new_col = column bins = [] return new_col, bins # helper function (should not be called from other functions) def normalize(column): print("DEB Normalize") h = abs(column.min()) new_col = column + h return new_col def discretization(self): print("DEB Discretization") replacements = {} bins = {} for i in range(0, self.df.shape[1]): # for each feature bins[i] = [] col = self.df.as_matrix()[:, i] flag_str = False flag_float = False flag_negative = False for j in col: if type(j) is str: flag_str = True elif type(j) is float: flag_float = True elif type(j) is int and j < 0: flag_negative = True if flag_str: continue elif flag_negative: new_col = self.normalize(col) replacements[i] = new_col bins[i] = [] elif flag_float: new_col, new_bins = self.discretize(col) replacements[i] = new_col bins[i] = new_bins for k, v in replacements.items(): self.df.iloc[:, k] = v def preprocess(self, removeColumnsWithMissingValues = False): print("DEB Preprocessing") m = self.df.as_matrix() # it is possible to encode enumerable features and to remove missing values with open('enumerable_columns.txt') as f: # e.g., self.enumerable_columns = [0, 5, 8] self.enumerable_columns = f.read() if self.enumerable_columns.__contains__(','): self.enumerable_columns = list(map(int, self.enumerable_columns.split(','))) else: self.enumerable_columns = [int(self.enumerable_columns)] print("enumerable columns are: " + str(self.enumerable_columns)) le = preprocessing.LabelEncoder() for col in self.enumerable_columns: # if the column is enumerable self.df[self.header[col]] = le.fit_transform(self.df[self.header[col]]) # A -> 0, B -> 1, ... # remove cols with missing values (NaN), even though you risk to reduce too much the dataset if removeColumnsWithMissingValues: for i in range(0, m.shape[1]): if True in m[:, i]: self.df = numpy.delete(self.df, 0, i) # delete column ############## MPL architecture ####################### def createTrainingAndTestSet(self): print("DEB Create Training set. Using formula 80-20%") self.trainSet, self.testSet = train_test_split(self.df, test_size=0.20) # hearth of the algorithm! def createTSnew(self): print("DEB Create TS new") for i in range(0, self.trainSet.shape[0]): for j in range(0, self.repeatSometimes): # choose small random subset of features X_hat X_hat = [int(self.trainSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)] # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk) row = numpy.copy(self.trainSet.as_matrix()[i, :]) for feature in X_hat: # here you set the random features to 0. X_hat represents the indices of such features row[feature] = 0 self.TSnew_X = self.TSnew_X.append(pandas.DataFrame(row.reshape(-1, len(row)))) # append row to TSnew_X copy = numpy.copy(self.trainSet.as_matrix()[i, :]) self.TSnew_Y = self.TSnew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy)))) # Y = x1...xk ############## Train & Predict ######################## def train(self): print("DEB Training with TSnew") self.MLP = MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=len(self.TSnew_Y.columns), learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) self.MLP.fit(self.TSnew_X, self.TSnew_Y) def predict(self): print("DEB Test") testSetNew_X = pandas.DataFrame() testSetNew_Y = pandas.DataFrame() # preparing the test set - here you do the same as in function createTSnew: if not os.path.isfile('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)): for i in range(0, self.testSet.shape[0]): # choose small random subset of features X_hat X_hat = [int(self.testSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)] # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk) row = numpy.copy(self.testSet.as_matrix()[i, :]) for feature in X_hat: # here you set the random features to 0. X_hat represents the indices of such features row[feature] = 0 testSetNew_X = testSetNew_X.append(pandas.DataFrame(row.reshape(-1, len(row)))) copy = numpy.copy(self.testSet.as_matrix()[i, :]) testSetNew_Y = testSetNew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy)))) # Y = x1...xk testSetNew_Y.to_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) testSetNew_Y.to_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) else: # if the needed DataFrames have already been calculated, simply load them from disk self.trainSet = self.trainSet.from_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.trainSet = self.trainSet.from_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) # predictions self.MLP.predict(testSetNew_X) print("Score of method (repetitions={}, subset={}): {}%".format(self.repeatSometimes, self.dim_random_subset, self.MLP.score(testSetNew_X, testSetNew_Y) * 100)) ########################## Helper functions #################### def writeCSV(self): print("DEB WriteCSV") self.trainSet.to_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.testSet.to_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_X.to_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_Y.to_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) def readCSV(self): print("DEB ReadCSV") self.trainSet = self.trainSet.from_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.testSet = self.testSet.from_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_X = self.TSnew_X.from_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)) self.TSnew_Y = self.TSnew_Y.from_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
bestNeurons = 0 bestEta = 0 bestScore = float('-inf') score = 0 for neurons in range(20, 200, 1): for eta in range(1, 11, 1): eta = eta / 10.0 kf = KFold(n_splits=10) cvscore = [] for train, validation in kf.split(X): X_train, X_validation, y_train, y_validation = X[train, :], X[ validation, :], y[train], y[validation] # here we create the MLP regressor mlp = MLPRegressor(hidden_layer_sizes=(neurons, ), verbose=False, learning_rate_init=eta) # here we train the MLP mlp.fit(X_train, y_train) # now we get E_out for validation set score = mlp.score(X_validation, y_validation) cvscore.append(score) # average CV score score = sum(cvscore) / len(cvscore) if (score > bestScore): bestScore = score bestNeurons = neurons bestEta = eta print("Neurons " + str(neurons) + ", eta " + str(eta) + ". Testing set CV score: %f" % score)
from sklearn.model_selection import KFold from sklearn.neural_network import MLPRegressor reg = MLPRegressor(activation='logistic', alpha=0.0001, batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08, hidden_layer_sizes=100, learning_rate='constant', learning_rate_init=0.001, max_iter=200, momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5, random_state=None, shuffle=True, solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, warm_start=False) kf = KFold(n_splits=10) print("Using ", kf.get_n_splits(X), " folds") from sklearn.metrics import r2_score avg_r2_train = []
plt.plot(X, y_poly, c='b', label='Polynomial model') plt.legend() plt.show() ### MLP from sklearn.neural_network import MLPRegressor import numpy as np import matplotlib.pyplot as plt x = np.arange(0.0, 1, 0.01).reshape(-1, 1) y = np.sin(2 * np.pi * x) mlp_reg = MLPRegressor( hidden_layer_sizes=(10, 3), activation='relu', solver='adam', learning_rate='constant', learning_rate_init=0.01, max_iter=1000, tol=0.0001, ) mlp_reg.fit(x, y) test_x = np.arange(0.0, 1, 0.05).reshape(-1, 1) test_y = mlp_reg.predict(test_x) plt.scatter(x, y, c='b', marker="s", label='real') plt.scatter(test_x, test_y, c='r', marker="o", label='NN Prediction') plt.legend() plt.show() ### KNN from sklearn import neighbors
model.fit(X_train_mode,y_train) y_pred = model.predict(X_test_mode) print(mean_squared_error(y_test, y_pred)) from sklearn.svm import SVR model = SVR() model.fit(X_train_0,y_train) y_pred = model.predict(X_test_0) print(mean_squared_error(y_test, y_pred)) model.fit(X_train_mode,y_train) y_pred = model.predict(X_test_mode) print(mean_squared_error(y_test, y_pred)) from sklearn.neural_network import MLPRegressor model = MLPRegressor() model.fit(X_train_0,y_train) y_pred = model.predict(X_test_0) print(mean_squared_error(y_test, y_pred)) model.fit(X_train_mode,y_train) y_pred = model.predict(X_test_mode) print(mean_squared_error(y_test, y_pred)) from sklearn.svm import LinearSVR model = LinearSVR() model.fit(X_train_0,y_train) y_pred = model.predict(X_test_0) print(mean_squared_error(y_test, y_pred)) model.fit(X_train_mode,y_train)
df = pd.read_csv(FILE) mmscaler = MinMaxScaler() dates = df['Index'].tolist() dates = np.reshape(dates, (len(dates), 1)) dates = mmscaler.fit_transform(dates) closes = df['Adj Close'].tolist() closes = np.reshape(closes, (len(closes), 1)) closes = mmscaler.fit_transform(closes) closes = closes.ravel() SIZE = len(dates) svr = SVR(kernel = 'rbf', C = C_VAL, gamma = Y_VAL) mlp = MLPRegressor(hidden_layer_sizes = (100)) reg = LinearRegression() for i in range(int((1-TEST)*WINDOW), SIZE, int(WINDOW)): dates_train, dates_test = dates[i-int((1-TEST)*WINDOW):i], dates[i:i+int(TEST*WINDOW)] closes_train, closes_test = closes[i-int((1-TEST)*WINDOW):i], closes[i:i+int(TEST*WINDOW)] svr.fit(dates_train, closes_train) mlp.fit(dates_train, closes_train) reg.fit(dates_train, closes_train) trained_closes_svr = svr.predict(dates_train) tested_closes_svr = svr.predict(dates_test) trained_closes_mlp = mlp.predict(dates_train)
'min_data': 1, 'verbose': 0 } gbm = lgb.train(params, lgb_train, num_boost_round=20, valid_sets=lgb_eval, early_stopping_rounds=5) y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration) error3 = mean_squared_error(y_pred, y_test) scaler = StandardScaler() scaler.fit(X_train) X_train = scaler.transform(X_train) X_test = scaler.transform(X_test) mlp = MLPRegressor(hidden_layer_sizes=(13, 13, 13), max_iter=10000) mlp.fit(X_train, y_train) y_pred = mlp.predict(X_test) error4 = mean_squared_error(y_pred, y_test) model = svm.SVR(C=20000, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.0008, kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
def main(): # dimensions to test DIMENSIONS = [64, 32, 16, 8, 4, 2, 1] X, y = data_processing.read_data('Data/conmat_240.mat', 'Data/age_240.mat') X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.8) # train embeddings for each dimension encoders = list() for dimension in DIMENSIONS: print(str(dimension) + "-D Embedding Training") e_x = tf.keras.layers.Input((None, 268)) e_o = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(dimension, activation='tanh'))(e_x) e = tf.keras.Model(e_x, e_o) d_x = tf.keras.layers.Input((None, dimension)) d_o = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(268, activation='linear'))(d_x) d = tf.keras.Model(d_x, d_o) model = AutoEncoder(e, d) model.train(X_train, epochs=100, learning_rate=0.001, loss='mse') encoders.append((model, dimension)) # encode train and test data using embeddings, then flatten for prediction embedded_train_list = list() embedded_test_list = list() for model, dim in encoders: embedded_train_matrix = np.zeros((len(X_train), 268 * dim)) for i in range(len(X_train)): embedding_train = model.encode(X_train[i]) embedded_train_matrix[i] = np.ndarray.flatten(embedding_train) embedded_train_list.append(embedded_train_matrix) embedded_test_matrix = np.zeros((len(X_test), 268 * dim)) for i in range(len(X_test)): embedding_test = model.encode(X_test[i]) embedded_test_matrix[i] = np.ndarray.flatten(embedding_test) embedded_test_list.append(embedded_test_matrix) # train prediction models on encoded train data, then test on encoded test data and calculate Mean Squared Error lr_error_list = list() svr_error_list = list() mlp_error_list = list() lr_error_list_train = list() svr_error_list_train = list() mlp_error_list_train = list() for i in range(len(embedded_train_list)): #savemat(f'Data/neural_{DIMENSIONS[i]}.mat', {'train':embedded_train_list[i] ,'test':embedded_test_list[i]}) lr = Ridge(alpha=2).fit(embedded_train_list[i], y_train) svr = SVR().fit(embedded_train_list[i], np.reshape(y_train, -1)) mlp = MLPRegressor(hidden_layer_sizes=(64, 32, 16, 8), learning_rate_init=0.001, max_iter=1000).fit(embedded_train_list[i], np.reshape(y_train, -1)) predictedLR = lr.predict(embedded_train_list[i]) predictedSV = svr.predict(embedded_train_list[i]) predictedMLP = mlp.predict(embedded_train_list[i]) lr_error = mean_squared_error(predictedLR, y_train) svr_error = mean_squared_error(predictedSV, y_train) mlp_error = mean_squared_error(predictedMLP, y_train) lr_error_list_train.append(lr_error) svr_error_list_train.append(svr_error) mlp_error_list_train.append(mlp_error) predictedLR = lr.predict(embedded_test_list[i]) predictedSV = svr.predict(embedded_test_list[i]) predictedMLP = mlp.predict(embedded_test_list[i]) print(str(embedded_test_list[i].shape[-1] // 268) + "-D Predicted") lr_error = mean_squared_error(predictedLR, y_test) svr_error = mean_squared_error(predictedSV, y_test) mlp_error = mean_squared_error(predictedMLP, y_test) lr_error_list.append(lr_error) svr_error_list.append(svr_error) mlp_error_list.append(mlp_error) # plot MSE for different embedding dims and prediction methods width = 0.35 plt.bar(np.arange(len(lr_error_list_train)), lr_error_list_train, width, label="LinReg") plt.bar(np.arange(len(svr_error_list_train)) + width, svr_error_list_train, width, label="SVR") plt.bar(np.arange(len(mlp_error_list_train)) + 2 * width, mlp_error_list_train, width, label="MLP") plt.ylabel("MSE") plt.xlabel("Dimensions") plt.title("Autoencoder Mean Squared Error by Embedding Dimension - Train") plt.xticks(np.arange(len(svr_error_list)) + width, list(DIMENSIONS)) plt.legend(loc="best") plt.savefig('images/autoencoder_train') plt.show() width = 0.35 plt.bar(np.arange(len(lr_error_list)), lr_error_list, width, label="LinReg") plt.bar(np.arange(len(svr_error_list)) + width, svr_error_list, width, label="SVR") plt.bar(np.arange(len(mlp_error_list)) + 2 * width, mlp_error_list, width, label="MLP") plt.ylabel("MSE") plt.xlabel("Dimensions") plt.title("Autoencoder Mean Squared Error by Embedding Dimension - test") plt.xticks(np.arange(len(svr_error_list)) + width, list(DIMENSIONS)) plt.legend(loc="best") plt.savefig('images/autoencoder_test') plt.show()
rna_clf = MLPClassifier(solver='adam', alpha=0.0001, hidden_layer_sizes=(100, 4), random_state=1) score = cross_val_score(svm, X=features_normalized, y=target_discretized2, cv=kfold) score.mean() #Regressao linearR = LinearRegression() svr = SVR() rna_reg = MLPRegressor(solver='adam', alpha=0.0001, hidden_layer_sizes=(100, 4), random_state=1) all_features = [('f_norm', features_normalized), ('f_stand', features_standard)] all_targets_discretized = [('t_disc', target_discretized), ('t_stand_disc', target_standard_discretized), ('t_stand_norm', target_normalized_discretized), ('t_qcut', target_discretized2)] all_models_classification = [('SVM', svm), ('GNB', gaussianNB), ('LR', lr), ('KNN', knn), ('RNA_CLF', rna_clf)] all_models_regression = [('linearR', linearR), ('SVR', svr), ('RNA_REG', rna_reg)] all_targets = [('t_norm', target_normalized), ('t_stand', target_standard)]
class NN(object): """docstring for GP""" def __init__(self, space_dim, done_fktn, predict_change=False, sample_rejection=False): self.input_dim = space_dim + 1 self.output_dim = self.input_dim - 1 self.X = None self.Y = None self.done = done_fktn self.type = 'NN' self.predict_change = predict_change self.sample_rejection = sample_rejection self.nb_samples = 6000 self.kde = KernelDensity(bandwidth=10 / (space_dim * np.power(1000, 1 / space_dim))) def add_trajectory(self, observations, actions, rewards): if self.X is None: self.X = np.hstack((observations[:-1], actions)) if self.predict_change: self.Y = np.hstack( (observations[1:] - observations[:-1], rewards)) else: self.Y = np.hstack((observations[1:], rewards)) else: new_X = np.hstack((observations[:-1], actions)) if self.sample_rejection: index = self.reject_index(new_X) print(index.sum(), 'samples added') else: index = np.ones(len(new_X), dtype=bool) self.X = np.vstack((self.X, new_X[index])) if self.predict_change: self.Y = np.vstack( (self.Y, np.hstack((np.asarray(observations[1:][index]) - observations[:-1][index], rewards[index])))) else: self.Y = np.vstack((self.Y, np.hstack( (np.asarray(observations[1:][[index]]), rewards[index])))) def train(self): train_size = np.min((self.nb_samples, self.X.shape[0])) train_index = np.arange(self.X.shape[0], dtype=int) np.random.shuffle(train_index) train_index = train_index[:train_size] self.scaler = StandardScaler() self.scaler.fit(self.X[train_index]) X_train = self.scaler.transform(self.X) self.model = MLPRegressor(hidden_layer_sizes=(200), activation='logistic') self.model.fit(X_train[train_index], self.Y[train_index]) def predict(self, observation, action): obs = self.scaler.transform( np.asarray([*observation, action]).reshape(1, -1)) y_pred = self.model.predict(obs).flatten() if self.predict_change: state_pred = observation.flatten() + y_pred[:-1] else: state_pred = y_pred[:-1] reward_pred = y_pred[-1] return state_pred, reward_pred, self.done(state_pred) def reject_index(self, data): self.kde.fit(self.X) mins = np.min(self.X, axis=0) maxs = np.max(self.X, axis=0) means = (mins + maxs) / 2 scales = np.abs(maxs - mins) test = np.random.rand(1000, len(scales)) - 0.5 test *= scales test += means scores = self.kde.score_samples(test) max_populated, min_populated = np.max(scores), np.min(scores) mean_populated = (max_populated + min_populated) / 2 scores = self.kde.score_samples(data) cut = 1.1 * max_populated - (1 / (1 + np.exp( self.nb_samples / len(self.X) - len(self.X) / self.nb_samples)) ) * np.abs(max_populated - min_populated) return scores < cut
def training_by_different_model(dfnorm, y, name): svr = svm.SVR(kernel='linear') lr = LinearRegression() dt = DecisionTreeRegressor() rf = RandomForestRegressor() #kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) + WhiteKernel(noise_level=0.5) kernel = 50.0**2 * RBF(length_scale=50.0) + 0.5**2 * RationalQuadratic(length_scale=1.0) + WhiteKernel(noise_level=0.1) gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10,normalize_y=True) nn = MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=hidden_layer_sizes, random_state=1, max_iter=1000, activation='relu',learning_rate_init='0.01',momentum=0.9) cv = 3 n_jobs=3 predicted_lr = cross_val_predict(lr, dfnorm, y, cv=cv,n_jobs=n_jobs ) predicted_svr = cross_val_predict(svr, dfnorm,y,cv=cv,n_jobs=n_jobs ) predicted_dt = cross_val_predict(dt, dfnorm, y, cv=cv,n_jobs=n_jobs ) predicted_rf = cross_val_predict(rf, dfnorm, y, cv=cv,n_jobs=n_jobs ) predicted_gp = cross_val_predict(gp, dfnorm, y, cv=cv,n_jobs=n_jobs ) predicted_nn = cross_val_predict(nn, dfnorm, y, cv=cv,n_jobs=n_jobs ) #predicted_nn = 100 # do not run until the previous step finishes execution. gaussian process and neural network cross validation takes some time. result = cross_validate(gp, dfnorm, y,n_jobs=n_jobs, cv=cv, return_estimator=True) for i, score in enumerate(result["test_score"]): if score == max(result["test_score"]): gp = result["estimator"][i] print(gp) joblib.dump(gp, 'models/gp_{}.model'.format(name)) print("\tLR\tSVR\tDT\tRF\tNN\tGP") cv_lr = mean_absolute_error(y, predicted_lr) cv_svr = mean_absolute_error(y, predicted_svr) cv_dt = mean_absolute_error(y, predicted_dt) cv_rf = mean_absolute_error(y, predicted_rf) cv_nn = mean_absolute_error(y, predicted_nn) cv_gp = mean_absolute_error(y, predicted_gp) print("mae\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (cv_lr, cv_svr, cv_dt, cv_rf, cv_nn, cv_gp)) cv_lr = mean_absolute_percentage_error(y, predicted_lr) cv_svr = mean_absolute_percentage_error(y, predicted_svr) cv_dt = mean_absolute_percentage_error(y, predicted_dt) cv_rf = mean_absolute_percentage_error(y, predicted_rf) cv_nn = mean_absolute_percentage_error(y, predicted_nn) cv_gp = mean_absolute_percentage_error(y, predicted_gp) ##cv_gp = 0 print("mape\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (cv_lr, cv_svr, cv_dt, cv_rf, cv_nn, cv_gp)) cv_lr = sqrt(mean_squared_error(y, predicted_lr)) cv_svr = sqrt(mean_squared_error(y, predicted_svr)) cv_dt = sqrt(mean_squared_error(y, predicted_dt)) cv_rf = sqrt(mean_squared_error(y, predicted_rf)) cv_nn = sqrt(mean_squared_error(y, predicted_nn)) cv_gp = sqrt(mean_squared_error(y, predicted_gp)) #cv_gp = 0 print("rmse\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (cv_lr, cv_svr, cv_dt, cv_rf, cv_nn, cv_gp)) print("r2\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (r2_score(y, predicted_lr), r2_score(y, predicted_svr), r2_score(y, predicted_dt), r2_score(y, predicted_rf), r2_score(y, predicted_nn), r2_score(y, predicted_gp))) #print("r2\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (adj_r2_score(p, y, predicted_lr), r2_score(y, predicted_svr), r2_score(y, predicted_dt), r2_score(y, predicted_rf), r2_score(y, predicted_nn))) return fig, ax = plt.subplots() #plt.title('Cross-validated predictions of 95th percentile latency (ms)') ax.scatter(y, predicted_gp, edgecolors=(0, 0, 0)) ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax.set_xlabel('Measured tail latency (ms)') ax.set_ylabel('Predicted tail latency (ms)') #ax.set_xlim(50,500) #ax.set_ylim(50,500) plt.grid(True) #plt.xticks(np.arange(0, 501, step=100)) #plt.yticks(np.arange(0, 501, step=100)) plt.tight_layout() plt.show() return fig, ax = plt.subplots() #plt.title('Cross-validated predictions of 95th percentile latency (ms)') ax.scatter(y, predicted_nn, edgecolors=(0, 0, 0)) ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4) ax.set_xlabel('Measured tail latency (ms)') ax.set_ylabel('Predicted tail latency (ms)') #ax.set_xlim(50,500) #ax.set_ylim(50,500) plt.grid(True) #plt.xticks(np.arange(0, 501, step=100)) #plt.yticks(np.arange(0, 501, step=100)) plt.tight_layout() plt.show()
print(X.shape) _t1b = tm.time() # Transform data X = StandardScaler().fit_transform(X) # y_scaler = StandardScaler().fit(y) # y = y_scaler.transform(y) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) # Prepare linear model clf = MLPRegressor( hidden_layer_sizes=(128, 128, 64), activation="tanh", solver="adam", verbose=True, tol=1.0e-10, early_stopping=True ) """ font = {'family' : 'Bitstream Vera Sans', 'weight' : 'normal', 'size' : 9} plt.rc('font', **font) fig, axes = plt.subplots(nrows=1, ncols=1) axes.set_title("Data: " + file) axes.set_ylabel('Normalized distant count') axes.set_xlabel('Distance ($\AA$)') axes.hist(y_train, 150, color='blue',normed=True, label='plot',linewidth=2,alpha=1.0)
def inBuilt(inp_dat, out_dat, inp_start, inp_end, inc, out_max): inp = [] out = [] for n in inp_dat: inp.append([int(n[0] * 10)]) for n in out_dat: out.append(int(n * 100)) classifiers = [ ("LINEAR: ", linear_model.LinearRegression()), ('LOG-LBFGS: ', LogisticRegression(solver='lbfgs', max_iter=2000)), ('LOG-NEWTON: ', LogisticRegression(solver='newton-cg', max_iter=2000)), ('MLPCLAS-ADAM: ', MLPClassifier(solver='adam', max_iter=5000)), ('SGDREG: ', MLPRegressor(solver='lbfgs', max_iter=2000)), ] clas = [ ('SVC', LinearSVC(max_iter=2000)), ] for name, clf in classifiers: print(' ') clf.fit(inp, out) print(name, ': ') inp2 = [] out2 = [] x1 = np.arange(inp_start * 10, inp_end * 10, inc * 10) x = x1.reshape(-1, 1).tolist() for i in x: inp2.append(i[0] / 10) k = [] k.append(i[0]) y = clf.predict(k[0]) / 100 + 4 # for j in range(len(y)): # y[j] *= out_max out2.append(list(y)) # print(time.time() - start) plt.plot(np.asarray(inp2), np.asarray(out2), 'b--', np.asarray(inp2), cpw_plot.f2(np.asarray(inp2), 6), 'go') plt.ylabel('Impedance') plt.xlabel('a/b ') plt.title('cpw using ' + name + ' b/h=0.1') plt.show() errorplot = abs(np.asarray(out2).T - cpw_plot.f2(np.asarray(inp2), 6)) / cpw_plot.f2( np.asarray(inp2), 6) * 100 max_error = max(errorplot[0]) print("max error: ", max_error) avg_error = 0 for i in errorplot[0]: avg_error += i avg_error = avg_error / len(errorplot[0]) print("average erro: ", avg_error) plt.plot(np.asarray(inp2), errorplot[0], 'r--') plt.ylabel('absolute error') plt.xlabel('a/b') plt.title('%error using ' + name) plt.show() x = [ 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95 ] for i in x: ou = clf.predict(i * 10) print(ou[0] / 100)
from sklearn.model_selection import KFold, cross_validate from sklearn.svm import SVC, SVR from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor from sklearn.neural_network import MLPClassifier, MLPRegressor if __name__ == '__main__': # 直接実行された場合のみ実行し、それ以外の場合は実行しない # ベンチマークとなるアルゴリズムと、アルゴリズムを実装したモデルの一覧 # それぞれのアルゴリズムの引数を指定している。 models = [ ('SVM', SVC(random_state=1), SVR()), ('GaussianProcess', GaussianProcessClassifier(random_state=1), GaussianProcessRegressor(normalize_y=True, alpha=1, random_state=1)), ('KNeighbors', KNeighborsClassifier(), KNeighborsRegressor()), ('MLP', MLPClassifier(random_state=1), MLPRegressor(hidden_layer_sizes=(5), solver='lbfgs', random_state=1)), ] # 検証用データセットのファイルとファイルの区切り文字、ヘッダーとなる行の位置、インデックスとなる列の位置のリスト classifier_files = ['iris.data', 'sonar.all-data', 'glass.data'] classifier_params = [(',', None, None), (',', None, None), (',', None, 0)] regressor_files = [ 'airfoil_self_noise.dat', 'winequality-red.csv', 'winequality-white.csv' ] regressor_params = [(r'\t', None, None), (';', 0, None), (';', 0, None)] # 評価スコアを検証用データセットのファイル、アルゴリズムごとに保存する表 result = pd.DataFrame( columns=['target', 'function'] + [m[0] for m in models], index=range(len(classifier_files + regressor_files) * 2))
from __future__ import print_function, division from future.utils import iteritems from builtins import range, input # Note: you may need to update your version of future # sudo pip install -U future import numpy as np from sklearn.neural_network import MLPRegressor from util import getKaggleMNIST # get data X, _, Xt, _ = getKaggleMNIST() # create the model and train it model = MLPRegressor() model.fit(X, X) # test the model print("Train R^2:", model.score(X, X)) print("Test R^2:", model.score(Xt, Xt)) Xhat = model.predict(X) mse = ((Xhat - X)**2).mean() print("Train MSE:", mse) Xhat = model.predict(Xt) mse = ((Xhat - Xt)**2).mean() print("Test MSE:", mse)
y_train = y_train.reshape(-1) y_test = y_test.reshape(-1) #特徵縮放 #----------------------------------------------------------------------------------------------------------------------- sc = preprocessing.StandardScaler() sc.fit(X_train) X_train = sc.transform(X_train) X_test = sc.transform(X_test) #----------------------------------------------------------------------------------------------------------------------- mlp_reg = MLPRegressor( hidden_layer_sizes=[12, 12, 12], max_iter=100, activation='relu', #‘identity’, ‘logistic’, ‘tanh’, ‘relu’ learning_rate_init=0.001, solver='lbfgs', #lbfgs, sgd, adam random_state=6) model_nn = mlp_reg.fit(X_train, y_train) y_pred = model_nn.predict(X_test) MSE = metrics.mean_squared_error(y_test, y_pred) RMSE = np.sqrt(metrics.mean_squared_error(y_test, y_pred)) MAE = metrics.mean_absolute_error(y_test, y_pred) ACC = mlp_reg.score(X_test, y_test) # MAPE #-----------------------------------------------------------------------------------------------------------------------
class Ann: ''' Implementación e interfaz de la funcionalidad presentada de la ANN ''' def __init__(self): self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] # Parámetro de TD-lambda self.lambdaCoefficient = 0.9 def evaluar(self, entrada): ''' Devuelve la evaluación de la red para la entrada ''' return self._nn.predict(entrada) def agregar_a_entrenamiento(self, tableros, resultado): ''' Incorpora los datos de la partida a los ejemplos de entrenamiento ''' # Presento la partida de adelante para atrás tableros.reverse() for i in xrange(len(tableros)): # Representación del tablero, Valor estimado tablero, valorEstimado = tableros[i][0], tableros[i][1] self._entradas_entrenamiento.append(tablero) if i == 0 or True: # Si es el resultado final, utilizo como salida esperada el resultado de la partida self._salidas_esperadas_entrenamiento.append(resultado.value) else: # El valor a aprender dado por según TD-lambda valorAAprender = valorEstimado + self.lambdaCoefficient * ( self._salidas_esperadas_entrenamiento[i - 1] - valorEstimado) self._salidas_esperadas_entrenamiento.append(valorAAprender) def entrenar(self): ''' Aplico el entrenamiento a partir de los datos almacenados ''' self._nn.partial_fit(self._entradas_entrenamiento, self._salidas_esperadas_entrenamiento) self._entradas_entrenamiento = [] self._salidas_esperadas_entrenamiento = [] def almacenar(self): ''' Serializo y persisto la red ''' pickle.dump(self._nn, open(self.path, 'wb')) def cargar(self, path, red): ''' Deserealizo o creo una nueva red ''' self.path = path if os.path.isfile(path): # Si el archivo especificado existe, deserealizo la red self._nn = pickle.load(open(path, 'rb')) else: # Si no, inicializo la red especificada self._nn = red tableroVacio = ([EnumCasilla.EMPTY.value for _ in xrange(64)], 0) self.agregar_a_entrenamiento([tableroVacio], EnumResultado.EMPATE) self.entrenar()
# Multilayer Perceptron # General Example # Notes say that we should make sure to normalize this data before using for ANN from sklearn.neural_network import MLPRegressor mlp_reg = MLPRegressor( hidden_layer_sizes=10, #tuple eqaul to number of hidden layers which shows # number of neuron in each layer activation='relu', #can be identity, logistic, tanh, relu solver='adam', # lbfgs, sgd, adam learning_rate='constant', # better to be fixed as 'constant' learning_rate_init=0.01, #controls the step size in updating weights max_iter=1000, # maximum number of iterations to stop tol=0.0001) #tolerance for optimization mlp_reg.fit(X, y) mlp_reg.predict(X_dash) ####################### #Example with sine data ####################### from sklearn.neural_network import MLPRegressor import numpy as np import matplotlib.pyplot as plt x = np.arange( 0.0, 1, 0.01) #100 row matrix (Note that the output doesn't have correct size) x = x.reshape(-1, 1) #reshaping it to a 100 by 1
def get_stacking_model(): model = MLPRegressor(hidden_layer_sizes=(20,20)) X_train,y_train,_,_ = get_data() model.fit(X_train,y_train) return model
# 0.3600836547592847 #print(reg.coef_) from sklearn.metrics import mean_squared_error score = mean_squared_error(y_test, reg.predict(X_test)) print("linear regression: ", score) ############################################################# ##################################################################### from sklearn.neural_network import MLPRegressor nn = MLPRegressor( hidden_layer_sizes=(10,), activation='relu', solver='adam', alpha=0.001, batch_size='auto', learning_rate='constant', learning_rate_init=0.01, power_t=0.5, max_iter=1000, shuffle=True, random_state=9, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08) n = nn.fit(X_train, y_train) #y_pred = nn.predict(X_train) scr = nn.score(X_test, y_test) #print(scr) score = mean_squared_error(y_test, nn.predict(X_test)) print("MLPregressor: ", score) #0.3702685509025747 ########################################################################## ########################################## from sklearn.svm import SVR
from scipy.optimize import curve_fit DEBUG = False fitted = False nn_threshold = 0.8 #threshold for update the learn metricsNN = [] # metrics for Neural Networks [U,A,Qu,Q,Rt] coefNN = [] #Coefficients for Neural Networks [c1,c2,c3,c4] metrics = [] #For nonlinear regression [U,A,Qu,Q] respTime = [] #For nonlinear regression monitors = [] currentRsquare = 0.0 # Current best value for Rsquare currentCoefficients = [0.1,1,0.001,-0.8] # Current best values for coefficients [c1,c2,c3,c4] defaultCoefficients = [0.1,1,0.001,-0.8] historic = {'c1':[],'c2':[],'c3':[],'c4':[],'rsquared':[]} clf = MLPRegressor(solver='lbfgs',alpha=1e-5, random_state=1, activation='tanh',hidden_layer_sizes=(100,5), learning_rate = 'adaptive') frontpage = """<html> <head></head> <meta http-equiv="refresh" content="40"> <body> <form method="get" action="addMonitoringData"> <input type="text" value="" name="name"/> <button type="submit">Add a parameter</button> </form> <table style=\"width:100%\"> <caption>Monitoring Data</caption> <tr><td>Normalized Response Time</td><td>Guiltiness</td><td>[U,A,Qu,Q]</td></tr> """ def adjust_coeff(U,A,Qu,Q,Rt):
for b in range(len(training_x)): # number of bootstrapped samples Xtrain = training_x[b] ytrain = training_y[b] Xtest = test_x[b] predictions_recall = [] c = 1 for p in [20]: for q in [15]: reg = MLPRegressor(alpha=1e-4, hidden_layer_sizes=(p, q), random_state=1, activation="tanh", batch_size=64, max_iter=500) for i in range(21): predictions = [] reg.fit(Xtrain, ytrain[:, i]) for j in range(len(Xtest)): pred_y_test = reg.predict(Xtest[j].reshape(1, -1)) predictions.append(pred_y_test) prediction = np.array(predictions).reshape(-1, 1) predictions_recall.append(prediction) c = c + 1
def test_shuffle(): # Test that the shuffle parameter affects the training process (it should) X, y = make_regression(n_samples=50, n_features=5, n_targets=1, random_state=0) # The coefficients will be identical if both do or do not shuffle for shuffle in [True, False]: mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=shuffle) mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=shuffle) mlp1.fit(X, y) mlp2.fit(X, y) assert np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0]) # The coefficients will be slightly different if shuffle=True mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=True) mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1, random_state=0, shuffle=False) mlp1.fit(X, y) mlp2.fit(X, y) assert not np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])
from sklearn.neural_network import MLPRegressor from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from models import models_util dataset = models_util.load_metadata_dataset() # Difficulty and BPM input features. X_train = dataset[:, 0:2] y = dataset[:, 5] model = make_pipeline(StandardScaler(), MLPRegressor(hidden_layer_sizes=(10, ))) model.fit(X_train, y) models_util.save_model(model, models_util.MetadataPredictor.ACCURACY) print("Trained model saved.")
from datetime import datetime startTime = datetime.now() fileTrain = open("fingerDataTrain.dat",'r') fileVal = open("fingerDataVal.dat",'r') trainingSet = np.loadtxt(fileTrain) valSet = np.loadtxt(fileVal) fileTrain.close() fileVal.close() trainX = trainingSet[:,:13] trainY = trainingSet[:,14:] valX = valSet[:,:13] valY = valSet[:,14:] for i in range(trainX.shape[1]): m = trainX[:,i].mean() s = trainX[:,i].std() trainX[:,i] = (trainX[:,i]-m)/s valX[:,i] = (valX[:,i]-m)/s ann = MLPRegressor() ann.fit(trainX,trainY) sqError = ((ann.predict(valX)-valY)**2).mean() plt.scatter(valX[:,1], valY[:,3], color='black') plt.plot(valX[:,1], ann.predict(valX)[:,3], color='blue', linewidth=3) print datetime.now() - startTime
def initialize_model(self, total_num_actions: int, start_features): model = MLPRegressor(hidden_layer_sizes=(1024,), learning_rate="constant") model.partial_fit(self._features_to_model_input(start_features), np.zeros((1, total_num_actions))) self.model = model
dataframes = [ demand_weather_14_17.drop(columns=['Date']), demand_weather_14_16.drop(columns=['Date']), demand_weather_17.drop(columns=['Date']) ] run_sim_datasets(dataframes, plot=False) print('\n\n---- Model Evaluation ----') evaluate_models(demand_weather_14_16, demand_weather_17) print('\n\n---- Run the Simulator ----') # MLP and Linear models performed rather good on evaluate_models, therefore we test them on the simulation using # the parameters that we gained from the model evaluation mlp = MLPRegressor(alpha=1e-6, hidden_layer_sizes=[10], random_state=0, solver='lbfgs', max_iter=1000000) lr = LinearRegression() lasso = Lasso(alpha=0.0001) ridge = Ridge(alpha=1e-7) svr = LinearSVR(C=100) models = [lr, lasso, ridge, svr, mlp] for model in models: run_sim_ml_dataset(model, demand_weather_14_17, plot=False) # ============================================================================= # With a ss=1.1, MLP and Lasso score best. Therefore, I would use Lasso # regression, which has most feature coefficients on 0 and is easily # explainable. # =============================================================================
# In[79]: print("Gardient Boosting Features Importance") headers = ["name", "score"] values = sorted(zip(x_train.columns, m3.feature_importances_), key=lambda x: x[1]*-1) print(tabulate(values, headers, tablefmt="plain")) # In[54]: from sklearn.neural_network import MLPRegressor m2 = MLPRegressor(hidden_layer_sizes=(128, 11), learning_rate_init=0.01, verbose=True, max_iter=1000) m2.fit(x_train, y_train) # In[42]: score = r2_score(y_test,m2.predict(x_test)) print(score) # In[43]: sub = pd.DataFrame() sub['Id'] = test_df['Id']
y3_p = model.predict(X_plot) ''' model.fit(X4, y4) y4_p = model.predict(X_plot) model.fit(X5, y5) y5_p = model.predict(X_plot) ''' y6_p = shift(y1_p, -30, cval=0) y7_p = shift(y1_p, -15, cval=0) X = np.column_stack([X_plot, y1_p, y2_p, y3_p, y6_p, y7_p]) y = shift(y1_p, 30, cval=0) #poly = make_pipeline(PolynomialFeatures(3), Ridge()) mpl = MLPRegressor(beta_1=0.99) ''' y_t = y[-1000:-2] y = y[0:-1000] X_t = X[-1000:-2] X = X[0:-1000] mpl.fit(X, y) poly.fit(X, y) mpl_pred = mpl.predict(X_t) poly_pred = poly.predict(X_t) ''' mpl_pred = cross_val_predict(mpl, X, y, cv=10) #poly_pred = cross_val_predict(poly, X, y, cv=10) #nn_pred = cross_val_predict(model, X, y, cv=10) print mpl.get_params()
("regressor", regressor) ]) pipeline.fit(housing_X, housing_y) customize(regressor, **kwargs) store_pkl(pipeline, name + ".pkl") medv = DataFrame(pipeline.predict(housing_X), columns = ["MEDV"]) if(with_kneighbors == True): Xt = pipeline_transform(pipeline, housing_X) kneighbors = regressor.kneighbors(Xt) medv_ids = DataFrame(kneighbors[1] + 1, columns = ["neighbor(" + str(x + 1) + ")" for x in range(regressor.n_neighbors)]) medv = pandas.concat((medv, medv_ids), axis = 1) store_csv(medv, name + ".csv") build_housing(AdaBoostRegressor(DecisionTreeRegressor(random_state = 13, min_samples_leaf = 5), random_state = 13, n_estimators = 17), "AdaBoostHousing") build_housing(KNeighborsRegressor(), "KNNHousing", with_kneighbors = True) build_housing(MLPRegressor(activation = "tanh", hidden_layer_sizes = (26,), solver = "lbfgs", random_state = 13, tol = 0.001, max_iter = 1000), "MLPHousing") build_housing(SGDRegressor(random_state = 13), "SGDHousing") build_housing(SVR(), "SVRHousing") build_housing(LinearSVR(random_state = 13), "LinearSVRHousing") build_housing(NuSVR(), "NuSVRHousing") # # Anomaly detection # def build_iforest_housing_anomaly(iforest, name, **kwargs): mapper = DataFrameMapper([ (housing_X.columns.values, ContinuousDomain()) ]) pipeline = PMMLPipeline([ ("mapper", mapper),
training_data_input = ss_x.fit_transform(training_data_input) # 估算每个特征的平均值和标准差 test_data_input = ss_x.transform( test_data_input) # 注意:这里我们要用同样的参数来标准化测试集,使得测试集和训练集之间有可比性 ss_y = preprocessing.StandardScaler() training_data_output = ss_y.fit_transform(training_data_output) test_data_output = ss_y.transform(test_data_output) # 使得test_y_disorder变成一列 n_folds = 6 # 设置交叉检验的次数 model_br = BayesianRidge() # 建立贝叶斯岭回归模型对象 model_lr = LinearRegression() # 建立普通线性回归模型对象 model_etc = ElasticNet() # 建立弹性网络回归模型对象 model_svr = SVR() # 建立支持向量机回归模型对象 model_gbr = GradientBoostingRegressor() # 建立梯度增强算法回归模型对象 model_mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=(20, 20, 20), random_state=1) model_names = [ 'BayesianRidge', 'LinearRegression', 'ElasticNet', 'SVR', 'GBR', 'MLP' ] # 不同模型的名称列表 model_dic = [model_br, model_lr, model_etc, model_svr, model_gbr, model_mlp] # 不同回归模型对象的集合 cv_score_list = [] # 交叉检验结果列表 pre_y_list = [] # 各个回归模型预测的y值列表 for model in model_dic: # 读出每个回归模型对象 # 将每个回归模型导入交叉检验模型中做训练检验 scores = cross_val_score(model, training_data_input, training_data_output.ravel(), cv=n_folds)
class QN(object): def __init__(self, num_inputs, num_outputs): self.nx = num_inputs self.ny = num_outputs self.net = MLPRegressor(hidden_layer_sizes=(50, 10), max_iter=1, algorithm='sgd', learning_rate='constant', learning_rate_init=0.001, warm_start=True, momentum=0.9, nesterovs_momentum=True) self.initialize_network() # set experience replay self.mbsize = 128 # mini-batch size self.er_s = [] self.er_a = [] self.er_r = [] self.er_done = [] self.er_sp = [] self.er_size = 2000 # total size of mb, impliment as queue self.whead = 0 # write head def initialize_network(self): # function to initialize network weights xtrain = np.random.rand(256, self.nx) ytrain = 10 + np.random.rand(256, self.ny) self.net.fit(xtrain, ytrain) def update_network(self): # function updates network by sampling a mini-batch from the ER # Prepare train data chosen = list( np.random.randint(len(self.er_s), size=min(len(self.er_s), self.mbsize))) Xtrain = np.asarray([self.er_s[i] for i in chosen]) # calculate target target = np.random.rand(len(chosen), self.ny) for j, i in enumerate(chosen): # do a forward pass through s and sp Q_s = self.net.predict(self.er_s[i].reshape(1, -1)) Q_sp = self.net.predict(self.er_sp[i].reshape(1, -1)) target[j, :] = Q_s # target initialized to current prediction if (self.er_done[i] == True): target[j, self.er_a[i]] = self.er_r[ i] # if end of episode, target is terminal reward else: target[j, self.er_a[i]] = self.er_r[i] + 0.9 * max( max(Q_sp)) # Q_sp is list of list (why?) # fit the network self.net.fit(Xtrain, target) # single step of SGD def append_memory(self, s, a, r, sp, done): if (len(self.er_s) < self.er_size): self.er_s.append(s) self.er_a.append(a) self.er_r.append(r) self.er_sp.append(sp) self.er_done.append(done) self.whead = (self.whead + 1) % self.er_size else: self.er_s[self.whead] = s self.er_a[self.whead] = a self.er_r[self.whead] = r self.er_sp[self.whead] = sp self.er_done[self.whead] = done self.whead = (self.whead + 1) % self.er_size
from sklearn.neural_network import MLPRegressor import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error data = pd.read_csv('network_backup_dataset.csv') train = data.loc[:,['WeekNumber','DayofWeek','BackupStartTime','WorkFlowID','FileName','BackupTime']] target = data.loc[:,['SizeofBackup']] mlp = MLPRegressor(algorithm='sgd', hidden_layer_sizes=150, max_iter=200, shuffle=False, random_state=1) mlp.fit(train, target) prediction = mlp.predict(train) plt.plot(prediction,label='Prediction',color='red') plt.plot(target,label='Real Data',color='blue') plt.title('Copy Size versus Time based on Neural Network Regression') plt.xlabel('Time') plt.ylabel('Copy Size') plt.legend() plt.show() rmse = mean_squared_error(target.SizeofBackup,prediction)**0.5 print (rmse)
#KNN from sklearn.neighbors import KNeighborsRegressor KNN = KNeighborsRegressor() knn_param_grid = {'n_neighbors':[3,10]} knn_grid = model_selection.GridSearchCV(KNN, knn_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error') knn_grid.fit(X_train, y_train) print(' Best Params:' + str(knn_grid.best_params_)) KNN = KNeighborsRegressor(n_neighbors=10) KNN.fit(X_train, y_train) y_predict_knn=KNN.predict(X_test) mae_knn=(np.abs(y_predict_knn-y_test)).sum()/9467 joblib.dump(KNN, 'KNN.model') print(mae_knn) #mlp from sklearn.neural_network import MLPRegressor MLP = MLPRegressor(hidden_layer_sizes=(300, 200,200),max_iter=100,activation='relu') MLP.fit(X_train, y_train) y_predict_MLP=MLP.predict(X_test) mae_MLP=(np.abs(y_predict_MLP-y_test)).sum()/9467 joblib.dump(MLP, 'MLP.model') print(mae_MLP) #xgb import xgboost as xgb x_regress = xgb.XGBRegressor(max_depth=20,n_estimators =5000) x_regress_param_grid = {'max_depth': [5,20]} x_regress_grid = model_selection.GridSearchCV(x_regress, x_regress_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error') x_regress.fit(X_train, y_train) joblib.dump(x_regress, 'x_regress_grid.model') y_predict_xgb=x_regress.predict(X_test) mae_xgb=(np.abs(y_predict_xgb-y_test)).sum()/9467
class QN(object): def __init__(self, num_inputs, num_outputs): self.nx = num_inputs self.ny = num_outputs self.net = MLPRegressor(hidden_layer_sizes=(50, 10), max_iter=1, algorithm='sgd', learning_rate='constant', learning_rate_init=0.001, warm_start=True, momentum=0.9, nesterovs_momentum=True ) self.initialize_network() # set experience replay self.mbsize = 128 # mini-batch size self.er_s = [] self.er_a = [] self.er_r = [] self.er_done = [] self.er_sp = [] self.er_size = 2000 # total size of mb, impliment as queue self.whead = 0 # write head def initialize_network(self): # function to initialize network weights xtrain = np.random.rand(256, self.nx) ytrain = 10 + np.random.rand(256, self.ny) self.net.fit(xtrain, ytrain) def update_network(self): # function updates network by sampling a mini-batch from the ER # Prepare train data chosen = list(np.random.randint(len(self.er_s), size=min(len(self.er_s), self.mbsize))) Xtrain = np.asarray([self.er_s[i] for i in chosen]) # calculate target target = np.random.rand(len(chosen), self.ny) for j, i in enumerate(chosen): # do a forward pass through s and sp Q_s = self.net.predict(self.er_s[i].reshape(1, -1)) Q_sp = self.net.predict(self.er_sp[i].reshape(1, -1)) target[j, :] = Q_s # target initialized to current prediction if (self.er_done[i] == True): target[j, self.er_a[i]] = self.er_r[i] # if end of episode, target is terminal reward else: target[j, self.er_a[i]] = self.er_r[i] + 0.9 * max(max(Q_sp)) # Q_sp is list of list (why?) # fit the network self.net.fit(Xtrain, target) # single step of SGD def append_memory(self, s, a, r, sp, done): if (len(self.er_s) < self.er_size): self.er_s.append(s) self.er_a.append(a) self.er_r.append(r) self.er_sp.append(sp) self.er_done.append(done) self.whead = (self.whead + 1) % self.er_size else: self.er_s[self.whead] = s self.er_a[self.whead] = a self.er_r[self.whead] = r self.er_sp[self.whead] = sp self.er_done[self.whead] = done self.whead = (self.whead+1) % self.er_size
print(clf3.score(input.T[384:],targets[384:])) from sklearn import ensemble clf4=ensemble.RandomForestRegressor(random_state=0) clf4.fit(input.T[0:384],targets[0:384]) print(clf4.score(input.T[384:],targets[384:])) import matplotlib.pyplot as plt parameters={ 'min_samples_split':[2,10], 'max_depth':[3, 15],'n_estimators':[10,50]} from sklearn import grid_search ###### Step 5 Testing With Neural Networks from sklearn.neural_network import MLPRegressor clf5=MLPRegressor(random_state=0,hidden_layer_sizes=500,activation='logistic',max_iter=500,) clf5.fit(input.T[0:384],targets[0:384]) pred=clf5.predict(input.T[384:]) pred_train=clf5.predict(input.T[0:384]) print(clf5.score(input.T[384:],targets[384:])) print(clf5.get_params()) ### Plotting the results plt.figure(1) plt.plot(range(0,len(targets[384:])),pred,'red',range(0,len(targets[384:])),targets[384:],'blue') plt.figure(2) plt.plot(range(0,len(targets[0:384])),pred_train,'red',range(0,len(targets[0:384])),targets[0:384],'blue') plt.show() from sklearn.externals import joblib
"Random Forest", "Neural Net", "AdaBoost", # "Naive Bayes", # "QDA", "Gessaman", ] regressors = [ KNeighborsRegressor(3), SVR(kernel="linear", C=0.025), SVR(gamma=2, C=1), # GaussianProcessRegressor(1.0 * RBF(1.0)), DecisionTreeRegressor(max_depth=5), RandomForestRegressor(max_depth=5, n_estimators=20, max_features=1), MLPRegressor(alpha=1), AdaBoostRegressor(), # GaussianNB(), # QuadraticDiscriminantAnalysis(), Gessaman(nb_jobs=-1), ] # X, y = make_classification( # n_features=2, # n_redundant=0, # n_informative=2, # random_state=1, # n_clusters_per_class=1, # n_samples=n_samples, # ) # rng = np.random.RandomState(2)
geno = np.load('genodata.npy') pheno = np.load('phenodata.npy') X_tr = geno[:1000,1:] #slicing geno #X_va = geno[201:250,:] X_te = geno[1001:,1:] Y_tr = pheno[:1000,1:] #slicing pheno #Y_va = pheno[201:250,:] Y_te = pheno[1001:,1:] diabetes_X_train = X_tr diabetes_X_test = X_te diabetes_y_train = Y_tr diabetes_y_test = Y_te reg = MLPRegressor(hidden_layer_sizes=(1, ),algorithm='l-bfgs') reg.fit(X_tr,Y_tr) scores = cross_val_score(reg,geno[:,1:],pheno[:,1:],cv=10) #Result_Y = np.zeros((249,1), dtype='float64') Result_Y = reg.predict(X_te) #Yte = np.array(Y_te, dtype=np.float64) r_row,p_score = pearsonr(Result_Y,Y_te) # The mean square error print("Residual sum of squares: %.2f" % np.mean((reg.predict(diabetes_X_test) - diabetes_y_test) ** 2)) # Explained variance score: 1 is perfect prediction print('Variance score: %.2f' % reg.score(diabetes_X_test, diabetes_y_test)) print(Result_Y)
GradientBoostingRegressor(random_state=50), linear_model.HuberRegressor(), KNeighborsRegressor(), KernelRidge(), linear_model.Lars(), linear_model.LarsCV(), linear_model.Lasso(), linear_model.LassoCV(), linear_model.LassoLars(), linear_model.LassoLarsCV(), linear_model.LassoLarsIC(), linear_model.LinearRegression(), LinearSVR(), #linear_model.LogisticRegression(), #linear_model.LogisticRegressionCV(), MLPRegressor(), #linear_model.ModifiedHuber(), #linear_model.MultiTaskElasticNet(), #linear_model.MultiTaskElasticNetCV(), #linear_model.MultiTaskLasso(), #linear_model.MultiTaskLassoCV(), NuSVR(), linear_model.OrthogonalMatchingPursuit(), linear_model.OrthogonalMatchingPursuitCV(), PLSCanonical(), PLSRegression(), linear_model.PassiveAggressiveRegressor(), linear_model.RANSACRegressor(), RadiusNeighborsRegressor(), RandomForestRegressor(), #linear_model.RandomizedLasso(),
#Example with a Regressor using the scikit-learn library # example for the XOr gate from sklearn.neural_network import MLPRegressor X = [[0., 0.],[0., 1.], [1., 0.], [1., 1.]] # each one of the entries 00 01 10 11 y = [0, 1, 1, 0] # outputs for each one of the entries # check http://scikit-learn.org/dev/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor #for more details reg = MLPRegressor(hidden_layer_sizes=(5),activation='tanh', algorithm='sgd', alpha=0.001, learning_rate='constant', max_iter=10000, random_state=None, verbose=False, warm_start=False, momentum=0.8, tol=10e-8, shuffle=False) reg.fit(X,y) outp = reg.predict([[0., 0.],[0., 1.], [1., 0.], [1., 1.]]) print'Results:' print '0 0 0:', outp[0] print '0 1 1:', outp[1] print '1 0 1:', outp[2] print '1 1 0:', outp[0] print'Score:', reg.score(X, y)
def get_mlp_regressor(num_hidden_units=51): mlp = MLPRegressor(hidden_layer_sizes=num_hidden_units) return [mlp], ['Multi-Layer Perceptron']
def regression(N, P): assert len(N) == len(P) clf = MLPRegressor(hidden_layer_sizes=(15, ), activation='relu', algorithm='adam', alpha=0.0001) clf.fit (N, P) return clf