示例#1
0
def mlp_bench(x_train, y_train, x_test, fh):
    """
    Forecasts using a simple MLP which 6 nodes in the hidden layer

    :param x_train: train input data
    :param y_train: target values for training
    :param x_test: test data
    :param fh: forecasting horizon
    :return:
    """
    y_hat_test = []

    model = MLPRegressor(hidden_layer_sizes=6, activation='identity', solver='adam',
                         max_iter=100, learning_rate='adaptive', learning_rate_init=0.001,
                         random_state=42)
    model.fit(x_train, y_train)

    last_prediction = model.predict(x_test)[0]
    for i in range(0, fh):
        y_hat_test.append(last_prediction)
        x_test[0] = np.roll(x_test[0], -1)
        x_test[0, (len(x_test[0]) - 1)] = last_prediction
        last_prediction = model.predict(x_test)[0]

    return np.asarray(y_hat_test)
示例#2
0
def test_multioutput_regression():
    # Test that multi-output regression works as expected
    X, y = make_regression(n_samples=200, n_targets=5)
    mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200,
                       random_state=1)
    mlp.fit(X, y)
    assert_greater(mlp.score(X, y), 0.9)
 def _create_first_population(self):
     self._current_population = []
     for _ in range(self._n_individuals):
         mlp = MLPRegressor(hidden_layer_sizes = self._nn_architecture, alpha=10**-10, max_iter=1)
         mlp.fit([np.random.randn(self._n_features)], [np.random.randn(self._n_actions)])
         mlp.out_activation_ = 'softmax'
         self._current_population.append([mlp,0])
示例#4
0
def construct_train(train_length, **kwargs):
    """
    Train and test model with given input
    window and number of neurons in layer
    """
    start_cur_postion = 0
    steps, steplen = observations.size/(2 * train_length), train_length

    if 'hidden_layer' in kwargs:
        network = MLPRegressor(hidden_layer_sizes=kwargs['hidden_layer'])
    else:
        network = MLPRegressor()

    quality = []

    # fit model - configure parameters
    network.fit(observations[start_cur_postion:train_length][:, 1].reshape(1, train_length),
                observations[:, 1][start_cur_postion:train_length].reshape(1, train_length))

    parts = []

    # calculate predicted values
    # for each step add all predicted values to a list
    # TODO: add some parallelism here
    for i in xrange(0, steps):
        parts.append(network.predict(observations[start_cur_postion:train_length][:, 1]))
        start_cur_postion += steplen
        train_length += steplen

    # estimate model quality using 
    result = np.array(parts).flatten().tolist()
    for valnum, value in enumerate(result):
        quality.append((value - observations[valnum][1])**2)

    return sum(quality)/len(quality)
示例#5
0
def test_lbfgs_regression():
    # Test lbfgs on the boston dataset, a regression problems."""
    X = Xboston
    y = yboston
    for activation in ACTIVATION_TYPES:
        mlp = MLPRegressor(algorithm='l-bfgs', hidden_layer_sizes=50,
                           max_iter=150, shuffle=True, random_state=1,
                           activation=activation)
        mlp.fit(X, y)
        assert_greater(mlp.score(X, y), 0.95)
def GetOptimalCLF2(train_x,train_y,rand_starts = 8):
    '''
    Gets the optimal CLF function based on fixed settings
    
    Parameters
    ------------------------
    train_x - np.array
        Training feature vectors
    train_y - np.array
        Training label vectors
    rand_starts - int
        Number of random starts to do
        Default - 8 for 95% confidence and best 30%
    
    Returns
    ------------------------
    max_clf - sklearn function
        Optimal trained artificial neuron network
    '''
    
    #### Get number of feature inputs of training vector
    n_input = train_x.shape[1]
    
    #### Set initial loss value
    min_loss = 1e10
    
    #### Perform number of trainings according to random start set
    for i in range(rand_starts):
        
        #### Print current status
        print "Iteration number {}".format(i+1)
        
        #### Initialize ANN network
        clf = MLPRegressor(hidden_layer_sizes = (int(round(2*np.sqrt(n_input),0)),1), activation = 'logistic',solver = 'sgd', 
                           learning_rate = 'adaptive', max_iter = 100000000,tol = 1e-10,
                           early_stopping = True, validation_fraction = 1/3.)
        
        #### Fit data
        clf.fit(train_x,train_y)
        
        #### Get current loss
        cur_loss = clf.loss_
        
        #### Save current clf if loss is minimum
        if cur_loss < min_loss:
            
            #### Set min_loss to a new value
            min_loss = cur_loss
            
            #### Set max_clf to new value
            max_clf = clf
    
    return max_clf
示例#7
0
def MLP_Regressor(train_x, train_y):

    clf = MLPRegressor(  alpha=1e-05,
           batch_size='auto', beta_1=0.9, beta_2=0.999, early_stopping=False,
           epsilon=1e-08, hidden_layer_sizes=([8,8]), learning_rate='constant',
           learning_rate_init=0.01, max_iter=500, momentum=0.9,
           nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
           tol=0.0001, validation_fraction=0.1, verbose=False,
           warm_start=False)
    clf.fit(train_x, train_y)
    #score = metrics.accuracy_score(clf.predict((train_x)), (train_y))
    #print(score)
    return clf
示例#8
0
def test_lbfgs_regression():
    # Test lbfgs on the boston dataset, a regression problems.
    X = Xboston
    y = yboston
    for activation in ACTIVATION_TYPES:
        mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50,
                           max_iter=150, shuffle=True, random_state=1,
                           activation=activation)
        mlp.fit(X, y)
        if activation == 'identity':
            assert_greater(mlp.score(X, y), 0.84)
        else:
            # Non linear models perform much better than linear bottleneck:
            assert_greater(mlp.score(X, y), 0.95)
示例#9
0
def train_model(x_train, y_train, alpha=1e-3, hid_layers=[512], max_iter=100):
    """
    Train model on training data.
    :param x_train: training examples
    :param y_train: target variables
    :param alpha: L2 regularization coefficient
    :param hid_layers: hidden layer sizes
    :param max_iter: maximum number of iterations in L-BFGS optimization
    :return a model trained with neuron network
    """
    nn_model = MLPRegressor(solver='lbgfs', hidden_layer_sizes=hid_layers, 
                            alpha=alpha, max_iter=max_iter, 
                            activation="relu", random_state=1)
    nn_model.fit(x_train, y_train)
    
    return nn_model
示例#10
0
    def __init__(self, num_inputs, num_outputs):
        self.nx = num_inputs
        self.ny = num_outputs
        self.net = MLPRegressor(hidden_layer_sizes=(50, 10),
                                max_iter=1,
                                algorithm='sgd',
                                learning_rate='constant',
                                learning_rate_init=0.001,
                                warm_start=True,
                                momentum=0.9,
                                nesterovs_momentum=True
                                )

        self.initialize_network()

        # set experience replay
        self.mbsize = 128 # mini-batch size
        self.er_s = []
        self.er_a = []
        self.er_r = []
        self.er_done = []
        self.er_sp = []

        self.er_size = 2000  # total size of mb, impliment as queue
        self.whead = 0  # write head
示例#11
0
    def __init__(self):

        self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []
        # Parámetro de TD-lambda
        self.lambdaCoefficient = 0.9
示例#12
0
 def train(self):
     print("DEB Training with TSnew")
     self.MLP = MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
                              beta_2=0.999, early_stopping=False, epsilon=1e-08,
                              hidden_layer_sizes=len(self.TSnew_Y.columns), learning_rate='constant',
                              learning_rate_init=0.001, max_iter=200, momentum=0.9,
                              nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
                              solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
                              warm_start=False)
     self.MLP.fit(self.TSnew_X, self.TSnew_Y)
示例#13
0
class Ann:

    def __init__(self):

        self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []
        self.lambdaCoefficient = 0.9

    def evaluar(self, entrada):
        return self._nn.predict(entrada)

    def agregar_a_entrenamiento(self, tableros, resultado):

        tableros.reverse()
        for i in xrange(len(tableros)):
            tablero, valorEstimado = tableros[i][0], tableros[i][1]
            self._entradas_entrenamiento.append(tablero)
            if i == 0 or True:
                self._salidas_esperadas_entrenamiento.append(resultado.value)
            else:
                valorAAprender = valorEstimado + self.lambdaCoefficient * (self._salidas_esperadas_entrenamiento[i-1] -
                    valorEstimado)
                self._salidas_esperadas_entrenamiento.append(valorAAprender)

    def entrenar(self):
        self._nn.partial_fit(self._entradas_entrenamiento, self._salidas_esperadas_entrenamiento)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []

    def almacenar(self):
        pickle.dump(self._nn, open(self.path,'wb'))

    def cargar(self, path, red):
        self.path = path
        if os.path.isfile(path):
            self._nn = pickle.load(open(path, 'rb'))
        else:
            self._nn = red
            tableroVacio = ([EnumCasilla.EMPTY.value for _ in xrange(64)],0)
            self.agregar_a_entrenamiento([tableroVacio], EnumResultado.EMPATE)
            self.entrenar()
 def _create_new_nn(self, weights, biases):
     mlp = MLPRegressor(hidden_layer_sizes = self._nn_architecture, alpha=10**-10, max_iter=1)
     mlp.fit([np.random.randn(self._n_features)], [np.random.randn(self._n_actions)])
     mlp.coefs_ = weights
     mlp.intercepts_ = biases
     mlp.out_activation_ = 'softmax'
     return mlp
示例#15
0
def test_partial_fit_regression():
    # Test partial_fit on regression.
    # `partial_fit` should yield the same results as 'fit' for regression.
    X = Xboston
    y = yboston

    for momentum in [0, .9]:
        mlp = MLPRegressor(solver='sgd', max_iter=100, activation='relu',
                           random_state=1, learning_rate_init=0.01,
                           batch_size=X.shape[0], momentum=momentum)
        with warnings.catch_warnings(record=True):
            # catch convergence warning
            mlp.fit(X, y)
        pred1 = mlp.predict(X)
        mlp = MLPRegressor(solver='sgd', activation='relu',
                           learning_rate_init=0.01, random_state=1,
                           batch_size=X.shape[0], momentum=momentum)
        for i in range(100):
            mlp.partial_fit(X, y)

        pred2 = mlp.predict(X)
        assert_almost_equal(pred1, pred2, decimal=2)
        score = mlp.score(X, y)
        assert_greater(score, 0.75)
示例#16
0
cur_nester = False
if parameters[7] == 1:
    cur_nester = True

cur_momentum = parameters[6]
reg = MLPRegressor(hidden_layer_sizes=hidden_layers,
                   activation="relu",
                   solver=cur_solver,
                   alpha=parameters[2],
                   batch_size='auto',
                   learning_rate=cur_learning_rate,
                   learning_rate_init=parameters[4],
                   power_t=parameters[5],
                   max_iter=200,
                   shuffle=True,
                   random_state=None,
                   tol=parameters[8],
                   verbose=False,
                   warm_start=False,
                   momentum=cur_momentum,
                   nesterovs_momentum=cur_nester,
                   early_stopping=False,
                   validation_fraction=0.1,
                   beta_1=0.9,
                   beta_2=0.999,
                   epsilon=1e-08,
                   n_iter_no_change=10)

#score = cross_val_score(reg, partx, party, cv=3,n_jobs= multiprocessing.cpu_count())
#print(np.mean(score))

reg.fit(partx, party)
def main():
    cal_housing = fetch_california_housing()

    X, y = cal_housing.data, cal_housing.target
    names = cal_housing.feature_names

    # Center target to avoid gradient boosting init bias: gradient boosting
    # with the 'recursion' method does not account for the initial estimator
    # (here the average target, by default)
    y -= y.mean()

    print("Training MLPRegressor...")
    est = MLPRegressor(activation='logistic')
    est.fit(X, y)
    print('Computing partial dependence plots...')
    # We don't compute the 2-way PDP (5, 1) here, because it is a lot slower
    # with the brute method.
    features = [0, 5, 1, 2]
    plot_partial_dependence(est, X, features, feature_names=names,
                            n_jobs=3, grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with MLPRegressor')
    plt.subplots_adjust(top=0.9)  # tight_layout causes overlap with suptitle

    print("Training GradientBoostingRegressor...")
    est = GradientBoostingRegressor(n_estimators=100, max_depth=4,
                                    learning_rate=0.1, loss='huber',
                                    random_state=1)
    est.fit(X, y)
    print('Computing partial dependence plots...')
    features = [0, 5, 1, 2, (5, 1)]
    plot_partial_dependence(est, X, features, feature_names=names,
                            n_jobs=3, grid_resolution=50)
    fig = plt.gcf()
    fig.suptitle('Partial dependence of house value on non-location features\n'
                 'for the California housing dataset, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    print('Custom 3d plot via ``partial_dependence``')
    fig = plt.figure()

    target_feature = (1, 5)
    pdp, axes = partial_dependence(est, X, target_feature,
                                   grid_resolution=50)
    XX, YY = np.meshgrid(axes[0], axes[1])
    Z = pdp[0].T
    ax = Axes3D(fig)
    surf = ax.plot_surface(XX, YY, Z, rstride=1, cstride=1,
                           cmap=plt.cm.BuPu, edgecolor='k')
    ax.set_xlabel(names[target_feature[0]])
    ax.set_ylabel(names[target_feature[1]])
    ax.set_zlabel('Partial dependence')
    #  pretty init view
    ax.view_init(elev=22, azim=122)
    plt.colorbar(surf)
    plt.suptitle('Partial dependence of house value on median\n'
                 'age and average occupancy, with Gradient Boosting')
    plt.subplots_adjust(top=0.9)

    plt.show()
示例#18
0
def main(args=None):
    args = arg_parser().parse_args(args)
    if args.verbosity == 1:
        level = logging.getLevelName('INFO')
    elif args.verbosity >= 2:
        level = logging.getLevelName('DEBUG')
    else:
        level = logging.getLevelName('WARNING')
    logging.basicConfig(
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
        level=level)
    logger = logging.getLogger(__name__)
    try:
        np.random.seed(args.random_seed)
        if args.regr_type == 'rf':
            from sklearn.ensemble import RandomForestRegressor
            regr = RandomForestRegressor(
                n_jobs=args.n_jobs,
                min_samples_leaf=args.min_samp_leaf,
                n_estimators=args.n_trees,
                max_features=args.max_features,
                max_depth=args.max_depth,
                random_state=args.random_seed,
                verbose=1 if args.verbosity >= 2 else 0)
            flatten = True
        elif args.regr_type == 'xg':
            try:
                from xgboost import XGBRegressor
            except ImportError:
                logger.warn('Need to install xgboost to use xg option')
                raise
            regr = XGBRegressor(
                n_jobs=args.n_jobs,
                n_estimators=args.n_trees,
                random_state=args.random_seed,
                max_depth=3 if args.max_depth is None else args.max_depth,
                silent=False if args.verbosity >= 2 else True)
            flatten = True
        elif args.regr_type == 'pr':
            from sklearn.linear_model import LinearRegression
            regr = LinearRegression(
                n_jobs=args.n_jobs,
                fit_intercept=True if args.poly_deg is None else False)
            flatten = False
        elif args.regr_type == 'mlr':
            from synthit.synth.mlr import LinearRegressionMixture
            regr = LinearRegressionMixture(3,
                                           num_restarts=args.num_restarts,
                                           num_workers=args.n_jobs,
                                           max_iterations=args.max_iterations,
                                           threshold=args.threshold)
            args.poly_deg = 1 if args.poly_deg is None else args.poly_deg  # hack to get bias term included in features
            flatten = True
        elif args.regr_type == 'mlp':
            from sklearn.neural_network import MLPRegressor
            regr = MLPRegressor(hidden_layer_sizes=args.hidden_layer_sizes,
                                max_iter=args.max_iterations,
                                random_state=args.random_seed,
                                verbose=True if args.verbosity >= 2 else False)
            flatten = True
        else:
            raise SynthError(
                'Invalid regressor type: {}. {{rf, xg, pr, mlr, mlp}} are the only supported options.'
                .format(args.regr_type))
        logger.debug(regr)
        ps = PatchSynth(regr, args.patch_size, args.n_samples, args.ctx_radius,
                        args.threshold, args.poly_deg, args.mean,
                        args.full_patch, flatten, args.use_xyz)
        source = [ps.image_list(sd) for sd in args.source_dir]
        target = ps.image_list(args.target_dir)
        if any([len(source_) != len(target) for source_ in source]):
            raise SynthError(
                'Number of source and target images must be equal.')
        if args.mask_dir is not None:
            masks = ps.image_list(args.mask_dir)
            if len(masks) != len(target):
                raise SynthError(
                    'If masks are provided, the number of masks must be equal to the number of images.'
                )
            source = [[
                nib.Nifti1Image(src.get_data() * mask.get_data(), src.affine,
                                src.header)
                for (src, mask) in zip(source_, masks)
            ] for source_ in source]
            target = [
                nib.Nifti1Image(tgt.get_data() * mask.get_data(), tgt.affine,
                                tgt.header)
                for (tgt, mask) in zip(target, masks)
            ]
        else:
            masks = [None] * len(target)
        if not args.cross_validate:
            ps.fit(source, target, masks)
            outfile = 'trained_model.pkl' if args.output is None else args.output
            logger.info('Saving trained model: {}'.format(outfile))
            joblib.dump(ps, outfile)
        else:
            for i in range(len(target)):
                src = [[src_ for k, src_ in enumerate(source_) if i != k]
                       for source_ in source]
                tgt = [tgt_ for k, tgt_ in enumerate(target) if i != k]
                msk = [msk_ for k, msk_ in enumerate(masks) if i != k]
                ps.fit(src, tgt, msk)
                if args.output is not None:
                    name, ext = os.path.splitext(args.output)
                    outfile = name + '_{}'.format(i) + ext
                else:
                    outfile = 'trained_model_{}.pkl'.format(i)
                logger.info('Saving trained model: {}'.format(outfile))
                joblib.dump(ps, outfile)
        return 0
    except Exception as e:
        logger.exception(e)
        return 1
示例#19
0
class NeuralNetwork:
    ################# Fields #######################
    # dataset_filename: string - path to dataset
    # header: list - header of the dataset
    # enumerable_columns: list - the enumerable columns

    # df: matrix - data set
    # training_set: matrix - training set
    # test_set: matrix - test set

    # TSnew_X: matrix - training set of TSnew (see documentation)
    # TSnew_Y: matrix - training set of TSnew (see documentation)
    # dim_random_subset: int - number of features to set to 0 (see documentation)
    # repeatSometimes: int - number of for cicles (see documentation)

    def __init__(self, repeatSometimes = 2, dim_random_subset = 2):
        # variables initialization
        self.enumerable_columns = []
        self.dataset_filename = ""
        self.header = []
        self.df = pandas.DataFrame()
        self.trainSet = pandas.DataFrame()
        self.testSet = pandas.DataFrame()
        self.TSnew_X = pandas.DataFrame()
        self.TSnew_Y = pandas.DataFrame()

        self.repeatSometimes = repeatSometimes
        self.dim_random_subset = dim_random_subset

        # This code really needs much time and therefore I save some computations
        if not os.path.isfile('trainSet{}-{}.csv'.format(repeatSometimes, dim_random_subset)):
            self.readDataset()
            self.discretization()
            self.preprocess()

            # creating TSnew
            self.createTrainingAndTestSet()
            self.createTSnew()

            # backup encoded sets
            self.writeCSV()
        else:
            self.readCSV()

        # training and test
        self.train()
        self.predict()


    def readDataset(self):
        print("DEB Read dataset")

        with open('header.txt') as f:
            self.header = f.read().split(',')
            print(self.header)
        with open('dataset.txt') as f:
            self.dataset_filename = f.read()
            print(self.dataset_filename)
        self.df = pandas.read_csv(self.dataset_filename, names=self.header)
        print('Dataset with {} entries'.format(self.df.__len__()))

############# Preprocessing ##########################
    # helper function (should not be called from other functions)
    def discretize(self, column):
        print("DEB Discretize column " + column)
        sorted_col = sorted(column)
        l = len(column)
        n = int(numpy.floor(l / 2))
        if l % 2 == 0:
            median_1 = numpy.median(sorted_col[0:n])
            median_2 = numpy.median(sorted_col[n:])
        else:
            median_1 = numpy.median(sorted_col[0:(n + 1)])
            median_2 = numpy.median(sorted_col[(n + 1):])
        iqr = median_2 - median_1
        h = 2 * iqr * (1 / numpy.cbrt(l))
        if h > 0:
            bins_number = numpy.ceil((column.max() - column.min()) / h)
            new_col, bins = pandas.cut(column, bins_number, labels=False, retbins=True, include_lowest=False)
        else:
           new_col = column
           bins = []
        return new_col, bins

    # helper function (should not be called from other functions)
    def normalize(column):
        print("DEB Normalize")
        h = abs(column.min())
        new_col = column + h
        return new_col

    def discretization(self):
        print("DEB Discretization")
        replacements = {}
        bins = {}
        for i in range(0, self.df.shape[1]):  # for each feature
            bins[i] = []
            col = self.df.as_matrix()[:, i]
            flag_str = False
            flag_float = False
            flag_negative = False

            for j in col:
                if type(j) is str: flag_str = True
                elif type(j) is float: flag_float = True
                elif type(j) is int and j < 0: flag_negative = True

            if flag_str:
                continue
            elif flag_negative:
                new_col = self.normalize(col)
                replacements[i] = new_col
                bins[i] = []
            elif flag_float:
                new_col, new_bins = self.discretize(col)
                replacements[i] = new_col
                bins[i] = new_bins
            for k, v in replacements.items():
                self.df.iloc[:, k] = v

    def preprocess(self, removeColumnsWithMissingValues = False):
        print("DEB Preprocessing")
        m = self.df.as_matrix()

        # it is possible to encode enumerable features and to remove missing values
        with open('enumerable_columns.txt') as f:  # e.g., self.enumerable_columns = [0, 5, 8]
            self.enumerable_columns = f.read()
            if self.enumerable_columns.__contains__(','):
                self.enumerable_columns = list(map(int, self.enumerable_columns.split(',')))
            else:
                self.enumerable_columns = [int(self.enumerable_columns)]
            print("enumerable columns are: " + str(self.enumerable_columns))
        le = preprocessing.LabelEncoder()
        for col in self.enumerable_columns:
            # if the column is enumerable
            self.df[self.header[col]] = le.fit_transform(self.df[self.header[col]])  #  A -> 0, B -> 1, ...

        #  remove cols with missing values (NaN), even though you risk to reduce too much the dataset
        if removeColumnsWithMissingValues:
            for i in range(0, m.shape[1]):
                if True in m[:, i]:
                    self.df = numpy.delete(self.df, 0, i)  # delete column


############## MPL architecture #######################
    def createTrainingAndTestSet(self):
        print("DEB Create Training set. Using formula 80-20%")
        self.trainSet, self.testSet = train_test_split(self.df, test_size=0.20)

    # hearth of the algorithm!
    def createTSnew(self):
        print("DEB Create TS new")
        for i in range(0, self.trainSet.shape[0]):
            for j in range(0, self.repeatSometimes):
                # choose small random subset of features X_hat
                X_hat = [int(self.trainSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)]
                # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk)
                row = numpy.copy(self.trainSet.as_matrix()[i, :])
                for feature in X_hat:  # here you set the random features to 0. X_hat represents the indices of such features
                    row[feature] = 0
                self.TSnew_X = self.TSnew_X.append(pandas.DataFrame(row.reshape(-1, len(row))))  # append row to TSnew_X
                copy = numpy.copy(self.trainSet.as_matrix()[i, :])
                self.TSnew_Y = self.TSnew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy))))  # Y = x1...xk

############## Train & Predict ########################
    def train(self):
        print("DEB Training with TSnew")
        self.MLP = MLPRegressor(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
                                 beta_2=0.999, early_stopping=False, epsilon=1e-08,
                                 hidden_layer_sizes=len(self.TSnew_Y.columns), learning_rate='constant',
                                 learning_rate_init=0.001, max_iter=200, momentum=0.9,
                                 nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
                                 solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
                                 warm_start=False)
        self.MLP.fit(self.TSnew_X, self.TSnew_Y)

    def predict(self):
        print("DEB Test")

        testSetNew_X = pandas.DataFrame()
        testSetNew_Y = pandas.DataFrame()

        # preparing the test set - here you do the same as in function createTSnew:
        if not os.path.isfile('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset)):
            for i in range(0, self.testSet.shape[0]):
                # choose small random subset of features X_hat
                X_hat = [int(self.testSet.shape[1] * random.random()) for i in range(0, self.dim_random_subset)]
                # insert into TSnew the sample: (x1...X_hat = 0 ... xk ; x1...xk)
                row = numpy.copy(self.testSet.as_matrix()[i, :])
                for feature in X_hat:  # here you set the random features to 0. X_hat represents the indices of such features
                    row[feature] = 0
                testSetNew_X = testSetNew_X.append(pandas.DataFrame(row.reshape(-1, len(row))))
                copy = numpy.copy(self.testSet.as_matrix()[i, :])
                testSetNew_Y = testSetNew_Y.append(pandas.DataFrame(copy.reshape(-1, len(copy))))  # Y = x1...xk
            testSetNew_Y.to_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
            testSetNew_Y.to_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        else:  # if the needed DataFrames have already been calculated, simply load them from disk
            self.trainSet = self.trainSet.from_csv('testSetNew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
            self.trainSet = self.trainSet.from_csv('testSetNew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))

        # predictions
        self.MLP.predict(testSetNew_X)
        print("Score of method (repetitions={}, subset={}): {}%".format(self.repeatSometimes, self.dim_random_subset, self.MLP.score(testSetNew_X, testSetNew_Y) * 100))

########################## Helper functions ####################
    def writeCSV(self):
        print("DEB WriteCSV")
        self.trainSet.to_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.testSet.to_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_X.to_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_Y.to_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))

    def readCSV(self):
        print("DEB ReadCSV")
        self.trainSet = self.trainSet.from_csv('trainSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.testSet = self.testSet.from_csv('testSet{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_X = self.TSnew_X.from_csv('TSnew_X{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
        self.TSnew_Y = self.TSnew_Y.from_csv('TSnew_Y{}-{}.csv'.format(self.repeatSometimes, self.dim_random_subset))
bestNeurons = 0
bestEta = 0
bestScore = float('-inf')
score = 0
for neurons in range(20, 200, 1):
    for eta in range(1, 11, 1):
        eta = eta / 10.0
        kf = KFold(n_splits=10)
        cvscore = []
        for train, validation in kf.split(X):
            X_train, X_validation, y_train, y_validation = X[train, :], X[
                validation, :], y[train], y[validation]
            # here we create the MLP regressor
            mlp = MLPRegressor(hidden_layer_sizes=(neurons, ),
                               verbose=False,
                               learning_rate_init=eta)
            # here we train the MLP
            mlp.fit(X_train, y_train)
            # now we get E_out for validation set
            score = mlp.score(X_validation, y_validation)
            cvscore.append(score)

        # average CV score
        score = sum(cvscore) / len(cvscore)
        if (score > bestScore):
            bestScore = score
            bestNeurons = neurons
            bestEta = eta
            print("Neurons " + str(neurons) + ", eta " + str(eta) +
                  ". Testing set CV score: %f" % score)
from sklearn.model_selection import KFold

from sklearn.neural_network import MLPRegressor

reg = MLPRegressor(activation='logistic',
                   alpha=0.0001,
                   batch_size='auto',
                   beta_1=0.9,
                   beta_2=0.999,
                   early_stopping=False,
                   epsilon=1e-08,
                   hidden_layer_sizes=100,
                   learning_rate='constant',
                   learning_rate_init=0.001,
                   max_iter=200,
                   momentum=0.9,
                   n_iter_no_change=10,
                   nesterovs_momentum=True,
                   power_t=0.5,
                   random_state=None,
                   shuffle=True,
                   solver='lbfgs',
                   tol=0.0001,
                   validation_fraction=0.1,
                   verbose=False,
                   warm_start=False)
kf = KFold(n_splits=10)
print("Using ", kf.get_n_splits(X), " folds")

from sklearn.metrics import r2_score
avg_r2_train = []
示例#22
0
plt.plot(X, y_poly, c='b', label='Polynomial model')
plt.legend()
plt.show()

### MLP

from sklearn.neural_network import MLPRegressor
import numpy as np
import matplotlib.pyplot as plt
x = np.arange(0.0, 1, 0.01).reshape(-1, 1)
y = np.sin(2 * np.pi * x)
mlp_reg = MLPRegressor(
    hidden_layer_sizes=(10, 3),
    activation='relu',
    solver='adam',
    learning_rate='constant',
    learning_rate_init=0.01,
    max_iter=1000,
    tol=0.0001,
)
mlp_reg.fit(x, y)
test_x = np.arange(0.0, 1, 0.05).reshape(-1, 1)
test_y = mlp_reg.predict(test_x)
plt.scatter(x, y, c='b', marker="s", label='real')
plt.scatter(test_x, test_y, c='r', marker="o", label='NN Prediction')
plt.legend()
plt.show()

### KNN

from sklearn import neighbors
model.fit(X_train_mode,y_train)
y_pred = model.predict(X_test_mode)
print(mean_squared_error(y_test, y_pred))

from sklearn.svm import SVR
model = SVR()
model.fit(X_train_0,y_train)
y_pred = model.predict(X_test_0)
print(mean_squared_error(y_test, y_pred))

model.fit(X_train_mode,y_train)
y_pred = model.predict(X_test_mode)
print(mean_squared_error(y_test, y_pred))

from sklearn.neural_network import MLPRegressor
model = MLPRegressor()
model.fit(X_train_0,y_train)
y_pred = model.predict(X_test_0)
print(mean_squared_error(y_test, y_pred))

model.fit(X_train_mode,y_train)
y_pred = model.predict(X_test_mode)
print(mean_squared_error(y_test, y_pred))

from sklearn.svm import LinearSVR
model = LinearSVR()
model.fit(X_train_0,y_train)
y_pred = model.predict(X_test_0)
print(mean_squared_error(y_test, y_pred))

model.fit(X_train_mode,y_train)
df = pd.read_csv(FILE)
mmscaler = MinMaxScaler()

dates = df['Index'].tolist()
dates = np.reshape(dates, (len(dates), 1))
dates = mmscaler.fit_transform(dates)

closes = df['Adj Close'].tolist()
closes = np.reshape(closes, (len(closes), 1))
closes = mmscaler.fit_transform(closes)
closes = closes.ravel()

SIZE = len(dates)

svr = SVR(kernel = 'rbf', C = C_VAL, gamma = Y_VAL)
mlp = MLPRegressor(hidden_layer_sizes = (100))
reg = LinearRegression()

for i in range(int((1-TEST)*WINDOW), SIZE, int(WINDOW)):

	dates_train, dates_test = dates[i-int((1-TEST)*WINDOW):i], dates[i:i+int(TEST*WINDOW)]
	closes_train, closes_test = closes[i-int((1-TEST)*WINDOW):i], closes[i:i+int(TEST*WINDOW)]	

	svr.fit(dates_train, closes_train)
	mlp.fit(dates_train, closes_train)
	reg.fit(dates_train, closes_train)

	trained_closes_svr = svr.predict(dates_train)
	tested_closes_svr = svr.predict(dates_test)

	trained_closes_mlp = mlp.predict(dates_train)
示例#25
0
    'min_data': 1,
    'verbose': 0
}
gbm = lgb.train(params,
                lgb_train,
                num_boost_round=20,
                valid_sets=lgb_eval,
                early_stopping_rounds=5)
y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
error3 = mean_squared_error(y_pred, y_test)

scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
mlp = MLPRegressor(hidden_layer_sizes=(13, 13, 13), max_iter=10000)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
error4 = mean_squared_error(y_pred, y_test)

model = svm.SVR(C=20000,
                cache_size=200,
                coef0=0.0,
                degree=3,
                epsilon=0.1,
                gamma=0.0008,
                kernel='rbf',
                max_iter=-1,
                shrinking=True,
                tol=0.001,
                verbose=False)
示例#26
0
def main():
    # dimensions to test
    DIMENSIONS = [64, 32, 16, 8, 4, 2, 1]

    X, y = data_processing.read_data('Data/conmat_240.mat', 'Data/age_240.mat')

    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.8)

    # train embeddings for each dimension
    encoders = list()
    for dimension in DIMENSIONS:

        print(str(dimension) + "-D Embedding Training")

        e_x = tf.keras.layers.Input((None, 268))
        e_o = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(dimension, activation='tanh'))(e_x)
        e = tf.keras.Model(e_x, e_o)

        d_x = tf.keras.layers.Input((None, dimension))
        d_o = tf.keras.layers.TimeDistributed(
            tf.keras.layers.Dense(268, activation='linear'))(d_x)
        d = tf.keras.Model(d_x, d_o)

        model = AutoEncoder(e, d)
        model.train(X_train, epochs=100, learning_rate=0.001, loss='mse')

        encoders.append((model, dimension))

    # encode train and test data using embeddings, then flatten for prediction
    embedded_train_list = list()
    embedded_test_list = list()
    for model, dim in encoders:
        embedded_train_matrix = np.zeros((len(X_train), 268 * dim))
        for i in range(len(X_train)):
            embedding_train = model.encode(X_train[i])
            embedded_train_matrix[i] = np.ndarray.flatten(embedding_train)
        embedded_train_list.append(embedded_train_matrix)
        embedded_test_matrix = np.zeros((len(X_test), 268 * dim))
        for i in range(len(X_test)):
            embedding_test = model.encode(X_test[i])
            embedded_test_matrix[i] = np.ndarray.flatten(embedding_test)
        embedded_test_list.append(embedded_test_matrix)

    # train prediction models on encoded train data, then test on encoded test data and calculate Mean Squared Error
    lr_error_list = list()
    svr_error_list = list()
    mlp_error_list = list()
    lr_error_list_train = list()
    svr_error_list_train = list()
    mlp_error_list_train = list()
    for i in range(len(embedded_train_list)):
        #savemat(f'Data/neural_{DIMENSIONS[i]}.mat', {'train':embedded_train_list[i] ,'test':embedded_test_list[i]})
        lr = Ridge(alpha=2).fit(embedded_train_list[i], y_train)
        svr = SVR().fit(embedded_train_list[i], np.reshape(y_train, -1))
        mlp = MLPRegressor(hidden_layer_sizes=(64, 32, 16, 8),
                           learning_rate_init=0.001,
                           max_iter=1000).fit(embedded_train_list[i],
                                              np.reshape(y_train, -1))
        predictedLR = lr.predict(embedded_train_list[i])
        predictedSV = svr.predict(embedded_train_list[i])
        predictedMLP = mlp.predict(embedded_train_list[i])
        lr_error = mean_squared_error(predictedLR, y_train)
        svr_error = mean_squared_error(predictedSV, y_train)
        mlp_error = mean_squared_error(predictedMLP, y_train)
        lr_error_list_train.append(lr_error)
        svr_error_list_train.append(svr_error)
        mlp_error_list_train.append(mlp_error)
        predictedLR = lr.predict(embedded_test_list[i])
        predictedSV = svr.predict(embedded_test_list[i])
        predictedMLP = mlp.predict(embedded_test_list[i])
        print(str(embedded_test_list[i].shape[-1] // 268) + "-D Predicted")
        lr_error = mean_squared_error(predictedLR, y_test)
        svr_error = mean_squared_error(predictedSV, y_test)
        mlp_error = mean_squared_error(predictedMLP, y_test)
        lr_error_list.append(lr_error)
        svr_error_list.append(svr_error)
        mlp_error_list.append(mlp_error)

    # plot MSE for different embedding dims and prediction methods
    width = 0.35
    plt.bar(np.arange(len(lr_error_list_train)),
            lr_error_list_train,
            width,
            label="LinReg")
    plt.bar(np.arange(len(svr_error_list_train)) + width,
            svr_error_list_train,
            width,
            label="SVR")
    plt.bar(np.arange(len(mlp_error_list_train)) + 2 * width,
            mlp_error_list_train,
            width,
            label="MLP")
    plt.ylabel("MSE")
    plt.xlabel("Dimensions")
    plt.title("Autoencoder Mean Squared Error by Embedding Dimension - Train")
    plt.xticks(np.arange(len(svr_error_list)) + width, list(DIMENSIONS))
    plt.legend(loc="best")
    plt.savefig('images/autoencoder_train')
    plt.show()

    width = 0.35
    plt.bar(np.arange(len(lr_error_list)),
            lr_error_list,
            width,
            label="LinReg")
    plt.bar(np.arange(len(svr_error_list)) + width,
            svr_error_list,
            width,
            label="SVR")
    plt.bar(np.arange(len(mlp_error_list)) + 2 * width,
            mlp_error_list,
            width,
            label="MLP")
    plt.ylabel("MSE")
    plt.xlabel("Dimensions")
    plt.title("Autoencoder Mean Squared Error by Embedding Dimension - test")
    plt.xticks(np.arange(len(svr_error_list)) + width, list(DIMENSIONS))
    plt.legend(loc="best")
    plt.savefig('images/autoencoder_test')
    plt.show()
rna_clf = MLPClassifier(solver='adam',
                        alpha=0.0001,
                        hidden_layer_sizes=(100, 4),
                        random_state=1)

score = cross_val_score(svm,
                        X=features_normalized,
                        y=target_discretized2,
                        cv=kfold)
score.mean()

#Regressao
linearR = LinearRegression()
svr = SVR()
rna_reg = MLPRegressor(solver='adam',
                       alpha=0.0001,
                       hidden_layer_sizes=(100, 4),
                       random_state=1)

all_features = [('f_norm', features_normalized),
                ('f_stand', features_standard)]
all_targets_discretized = [('t_disc', target_discretized),
                           ('t_stand_disc', target_standard_discretized),
                           ('t_stand_norm', target_normalized_discretized),
                           ('t_qcut', target_discretized2)]
all_models_classification = [('SVM', svm), ('GNB', gaussianNB), ('LR', lr),
                             ('KNN', knn), ('RNA_CLF', rna_clf)]

all_models_regression = [('linearR', linearR), ('SVR', svr),
                         ('RNA_REG', rna_reg)]
all_targets = [('t_norm', target_normalized), ('t_stand', target_standard)]
示例#28
0
class NN(object):
    """docstring for GP"""
    def __init__(self,
                 space_dim,
                 done_fktn,
                 predict_change=False,
                 sample_rejection=False):
        self.input_dim = space_dim + 1
        self.output_dim = self.input_dim - 1
        self.X = None
        self.Y = None
        self.done = done_fktn
        self.type = 'NN'
        self.predict_change = predict_change
        self.sample_rejection = sample_rejection
        self.nb_samples = 6000
        self.kde = KernelDensity(bandwidth=10 /
                                 (space_dim * np.power(1000, 1 / space_dim)))

    def add_trajectory(self, observations, actions, rewards):
        if self.X is None:
            self.X = np.hstack((observations[:-1], actions))
            if self.predict_change:
                self.Y = np.hstack(
                    (observations[1:] - observations[:-1], rewards))
            else:
                self.Y = np.hstack((observations[1:], rewards))
        else:
            new_X = np.hstack((observations[:-1], actions))
            if self.sample_rejection:
                index = self.reject_index(new_X)
                print(index.sum(), 'samples added')
            else:
                index = np.ones(len(new_X), dtype=bool)
            self.X = np.vstack((self.X, new_X[index]))
            if self.predict_change:
                self.Y = np.vstack(
                    (self.Y,
                     np.hstack((np.asarray(observations[1:][index]) -
                                observations[:-1][index], rewards[index]))))
            else:
                self.Y = np.vstack((self.Y,
                                    np.hstack(
                                        (np.asarray(observations[1:][[index]]),
                                         rewards[index]))))

    def train(self):
        train_size = np.min((self.nb_samples, self.X.shape[0]))
        train_index = np.arange(self.X.shape[0], dtype=int)
        np.random.shuffle(train_index)
        train_index = train_index[:train_size]
        self.scaler = StandardScaler()
        self.scaler.fit(self.X[train_index])
        X_train = self.scaler.transform(self.X)
        self.model = MLPRegressor(hidden_layer_sizes=(200),
                                  activation='logistic')
        self.model.fit(X_train[train_index], self.Y[train_index])

    def predict(self, observation, action):
        obs = self.scaler.transform(
            np.asarray([*observation, action]).reshape(1, -1))
        y_pred = self.model.predict(obs).flatten()
        if self.predict_change:
            state_pred = observation.flatten() + y_pred[:-1]
        else:
            state_pred = y_pred[:-1]
        reward_pred = y_pred[-1]
        return state_pred, reward_pred, self.done(state_pred)

    def reject_index(self, data):
        self.kde.fit(self.X)
        mins = np.min(self.X, axis=0)
        maxs = np.max(self.X, axis=0)
        means = (mins + maxs) / 2
        scales = np.abs(maxs - mins)
        test = np.random.rand(1000, len(scales)) - 0.5
        test *= scales
        test += means
        scores = self.kde.score_samples(test)
        max_populated, min_populated = np.max(scores), np.min(scores)
        mean_populated = (max_populated + min_populated) / 2
        scores = self.kde.score_samples(data)
        cut = 1.1 * max_populated - (1 / (1 + np.exp(
            self.nb_samples / len(self.X) - len(self.X) / self.nb_samples))
                                     ) * np.abs(max_populated - min_populated)
        return scores < cut
示例#29
0
def training_by_different_model(dfnorm, y, name):
    svr = svm.SVR(kernel='linear')
    lr = LinearRegression()
    dt = DecisionTreeRegressor()
    rf = RandomForestRegressor()
    #kernel = C(1.0, (1e-3, 1e3)) * RBF(10, (1e-2, 1e2)) + WhiteKernel(noise_level=0.5)
    kernel =  50.0**2 * RBF(length_scale=50.0) + 0.5**2 * RationalQuadratic(length_scale=1.0) + WhiteKernel(noise_level=0.1)
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=10,normalize_y=True)

    nn = MLPRegressor(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=hidden_layer_sizes, random_state=1, max_iter=1000, activation='relu',learning_rate_init='0.01',momentum=0.9)

    cv = 3
    n_jobs=3
    predicted_lr = cross_val_predict(lr, dfnorm, y, cv=cv,n_jobs=n_jobs )
    predicted_svr = cross_val_predict(svr, dfnorm,y,cv=cv,n_jobs=n_jobs )
    predicted_dt = cross_val_predict(dt, dfnorm, y, cv=cv,n_jobs=n_jobs )
    predicted_rf = cross_val_predict(rf, dfnorm, y, cv=cv,n_jobs=n_jobs )
    predicted_gp = cross_val_predict(gp, dfnorm, y, cv=cv,n_jobs=n_jobs )
    predicted_nn = cross_val_predict(nn, dfnorm, y, cv=cv,n_jobs=n_jobs )
    #predicted_nn = 100
    # do not run until the previous step finishes execution. gaussian process and neural network cross validation takes some time.

    result = cross_validate(gp, dfnorm, y,n_jobs=n_jobs, cv=cv, return_estimator=True)
    for i, score in enumerate(result["test_score"]):
        if score == max(result["test_score"]):
            gp = result["estimator"][i]
        
    print(gp)

    joblib.dump(gp, 'models/gp_{}.model'.format(name)) 

    print("\tLR\tSVR\tDT\tRF\tNN\tGP")
    cv_lr = mean_absolute_error(y, predicted_lr)
    cv_svr = mean_absolute_error(y, predicted_svr)
    cv_dt = mean_absolute_error(y, predicted_dt)
    cv_rf = mean_absolute_error(y, predicted_rf)
    cv_nn = mean_absolute_error(y, predicted_nn)
    cv_gp = mean_absolute_error(y, predicted_gp)
    
    print("mae\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (cv_lr, cv_svr, cv_dt, cv_rf, cv_nn, cv_gp))
    cv_lr = mean_absolute_percentage_error(y, predicted_lr)
    cv_svr = mean_absolute_percentage_error(y, predicted_svr)
    cv_dt = mean_absolute_percentage_error(y, predicted_dt)
    cv_rf = mean_absolute_percentage_error(y, predicted_rf)
    cv_nn = mean_absolute_percentage_error(y, predicted_nn)
    cv_gp = mean_absolute_percentage_error(y, predicted_gp)
    ##cv_gp = 0
    print("mape\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (cv_lr, cv_svr, cv_dt, cv_rf, cv_nn, cv_gp))
    cv_lr = sqrt(mean_squared_error(y, predicted_lr))
    cv_svr = sqrt(mean_squared_error(y, predicted_svr))
    cv_dt = sqrt(mean_squared_error(y, predicted_dt))
    cv_rf = sqrt(mean_squared_error(y, predicted_rf))
    cv_nn = sqrt(mean_squared_error(y, predicted_nn))
    cv_gp = sqrt(mean_squared_error(y, predicted_gp))
    #cv_gp = 0

    print("rmse\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (cv_lr, cv_svr, cv_dt, cv_rf, cv_nn, cv_gp))


    print("r2\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (r2_score(y, predicted_lr), r2_score(y, predicted_svr), r2_score(y, predicted_dt), r2_score(y, predicted_rf), r2_score(y, predicted_nn), r2_score(y, predicted_gp)))
    #print("r2\t%.2f\t%.2f\t%.2f\t%.2f\t%.2f" % (adj_r2_score(p, y, predicted_lr), r2_score(y, predicted_svr), r2_score(y, predicted_dt), r2_score(y, predicted_rf), r2_score(y, predicted_nn)))
    return 
    
    fig, ax = plt.subplots()
    #plt.title('Cross-validated predictions of 95th percentile latency (ms)')
    ax.scatter(y, predicted_gp, edgecolors=(0, 0, 0))
    ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    ax.set_xlabel('Measured tail latency (ms)')
    ax.set_ylabel('Predicted tail latency (ms)')
    #ax.set_xlim(50,500)
    #ax.set_ylim(50,500)
    plt.grid(True)
    #plt.xticks(np.arange(0, 501, step=100))
    #plt.yticks(np.arange(0, 501, step=100))
    plt.tight_layout()
    plt.show()
    return 
    fig, ax = plt.subplots()
    #plt.title('Cross-validated predictions of 95th percentile latency (ms)')
    ax.scatter(y, predicted_nn, edgecolors=(0, 0, 0))
    ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    ax.set_xlabel('Measured tail latency (ms)')
    ax.set_ylabel('Predicted tail latency (ms)')
    #ax.set_xlim(50,500)
    #ax.set_ylim(50,500)
    plt.grid(True)
    #plt.xticks(np.arange(0, 501, step=100))
    #plt.yticks(np.arange(0, 501, step=100))
    plt.tight_layout()
    plt.show()
print(X.shape)

_t1b = tm.time()

# Transform data
X = StandardScaler().fit_transform(X)

# y_scaler = StandardScaler().fit(y)
# y = y_scaler.transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

# Prepare linear model
clf = MLPRegressor(
    hidden_layer_sizes=(128, 128, 64), activation="tanh", solver="adam", verbose=True, tol=1.0e-10, early_stopping=True
)

"""
font = {'family' : 'Bitstream Vera Sans',
        'weight' : 'normal',
        'size'   : 9}

plt.rc('font', **font)

fig, axes = plt.subplots(nrows=1, ncols=1)
axes.set_title("Data: " + file)
axes.set_ylabel('Normalized distant count')
axes.set_xlabel('Distance ($\AA$)')

axes.hist(y_train, 150, color='blue',normed=True, label='plot',linewidth=2,alpha=1.0)
def inBuilt(inp_dat, out_dat, inp_start, inp_end, inc, out_max):

    inp = []
    out = []
    for n in inp_dat:
        inp.append([int(n[0] * 10)])
    for n in out_dat:
        out.append(int(n * 100))

    classifiers = [
        ("LINEAR: ", linear_model.LinearRegression()),
        ('LOG-LBFGS: ', LogisticRegression(solver='lbfgs', max_iter=2000)),
        ('LOG-NEWTON: ', LogisticRegression(solver='newton-cg',
                                            max_iter=2000)),
        ('MLPCLAS-ADAM: ', MLPClassifier(solver='adam', max_iter=5000)),
        ('SGDREG: ', MLPRegressor(solver='lbfgs', max_iter=2000)),
    ]
    clas = [
        ('SVC', LinearSVC(max_iter=2000)),
    ]

    for name, clf in classifiers:
        print(' ')
        clf.fit(inp, out)
        print(name, ': ')
        inp2 = []
        out2 = []

        x1 = np.arange(inp_start * 10, inp_end * 10, inc * 10)
        x = x1.reshape(-1, 1).tolist()

        for i in x:
            inp2.append(i[0] / 10)
            k = []
            k.append(i[0])
            y = clf.predict(k[0]) / 100 + 4
            # for j in range(len(y)):
            #     y[j] *= out_max
            out2.append(list(y))

            #   print(time.time() - start)

        plt.plot(np.asarray(inp2), np.asarray(out2), 'b--', np.asarray(inp2),
                 cpw_plot.f2(np.asarray(inp2), 6), 'go')
        plt.ylabel('Impedance')
        plt.xlabel('a/b ')
        plt.title('cpw using ' + name + '   b/h=0.1')
        plt.show()

        errorplot = abs(np.asarray(out2).T -
                        cpw_plot.f2(np.asarray(inp2), 6)) / cpw_plot.f2(
                            np.asarray(inp2), 6) * 100

        max_error = max(errorplot[0])
        print("max error: ", max_error)

        avg_error = 0
        for i in errorplot[0]:
            avg_error += i
        avg_error = avg_error / len(errorplot[0])

        print("average erro: ", avg_error)

        plt.plot(np.asarray(inp2), errorplot[0], 'r--')
        plt.ylabel('absolute error')

        plt.xlabel('a/b')

        plt.title('%error using ' + name)
        plt.show()

        x = [
            0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75,
            0.8, 0.85, 0.9, 0.95
        ]
        for i in x:
            ou = clf.predict(i * 10)
            print(ou[0] / 100)
示例#32
0
from sklearn.model_selection import KFold, cross_validate
from sklearn.svm import SVC, SVR
from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.neural_network import MLPClassifier, MLPRegressor

if __name__ == '__main__':  # 直接実行された場合のみ実行し、それ以外の場合は実行しない
    # ベンチマークとなるアルゴリズムと、アルゴリズムを実装したモデルの一覧
    # それぞれのアルゴリズムの引数を指定している。
    models = [
        ('SVM', SVC(random_state=1), SVR()),
        ('GaussianProcess', GaussianProcessClassifier(random_state=1),
         GaussianProcessRegressor(normalize_y=True, alpha=1, random_state=1)),
        ('KNeighbors', KNeighborsClassifier(), KNeighborsRegressor()),
        ('MLP', MLPClassifier(random_state=1),
         MLPRegressor(hidden_layer_sizes=(5), solver='lbfgs', random_state=1)),
    ]

    # 検証用データセットのファイルとファイルの区切り文字、ヘッダーとなる行の位置、インデックスとなる列の位置のリスト
    classifier_files = ['iris.data', 'sonar.all-data', 'glass.data']
    classifier_params = [(',', None, None), (',', None, None), (',', None, 0)]
    regressor_files = [
        'airfoil_self_noise.dat', 'winequality-red.csv',
        'winequality-white.csv'
    ]
    regressor_params = [(r'\t', None, None), (';', 0, None), (';', 0, None)]

    # 評価スコアを検証用データセットのファイル、アルゴリズムごとに保存する表
    result = pd.DataFrame(
        columns=['target', 'function'] + [m[0] for m in models],
        index=range(len(classifier_files + regressor_files) * 2))
from __future__ import print_function, division
from future.utils import iteritems
from builtins import range, input
# Note: you may need to update your version of future
# sudo pip install -U future


import numpy as np
from sklearn.neural_network import MLPRegressor
from util import getKaggleMNIST



# get data
X, _, Xt, _ = getKaggleMNIST()

# create the model and train it
model = MLPRegressor()
model.fit(X, X)

# test the model
print("Train R^2:", model.score(X, X))
print("Test R^2:", model.score(Xt, Xt))

Xhat = model.predict(X)
mse = ((Xhat - X)**2).mean()
print("Train MSE:", mse)

Xhat = model.predict(Xt)
mse = ((Xhat - Xt)**2).mean()
print("Test MSE:", mse)
y_train = y_train.reshape(-1)
y_test = y_test.reshape(-1)

#特徵縮放
#-----------------------------------------------------------------------------------------------------------------------
sc = preprocessing.StandardScaler()
sc.fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)
#-----------------------------------------------------------------------------------------------------------------------

mlp_reg = MLPRegressor(
    hidden_layer_sizes=[12, 12, 12],
    max_iter=100,
    activation='relu',  #‘identity’, ‘logistic’, ‘tanh’, ‘relu’
    learning_rate_init=0.001,
    solver='lbfgs',  #lbfgs, sgd, adam
    random_state=6)

model_nn = mlp_reg.fit(X_train, y_train)

y_pred = model_nn.predict(X_test)

MSE = metrics.mean_squared_error(y_test, y_pred)
RMSE = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
MAE = metrics.mean_absolute_error(y_test, y_pred)
ACC = mlp_reg.score(X_test, y_test)

# MAPE
#-----------------------------------------------------------------------------------------------------------------------
示例#35
0
class Ann:
    '''
        Implementación e interfaz de la funcionalidad presentada de la ANN
    '''
    def __init__(self):

        self._nn = MLPRegressor(hidden_layer_sizes=(10,), verbose=False, warm_start=True)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []
        # Parámetro de TD-lambda
        self.lambdaCoefficient = 0.9

    def evaluar(self, entrada):
        '''
            Devuelve la evaluación de la red para la entrada
        '''
        return self._nn.predict(entrada)

    def agregar_a_entrenamiento(self, tableros, resultado):
        '''
            Incorpora los datos de la partida a los ejemplos de entrenamiento
        '''

        # Presento la partida de adelante para atrás
        tableros.reverse()
        for i in xrange(len(tableros)):
            # Representación del tablero, Valor estimado
            tablero, valorEstimado = tableros[i][0], tableros[i][1]
            self._entradas_entrenamiento.append(tablero)
            if i == 0 or True:
                # Si es el resultado final, utilizo como salida esperada el resultado de la partida
                self._salidas_esperadas_entrenamiento.append(resultado.value)
            else:
                # El valor a aprender dado por según TD-lambda
                valorAAprender = valorEstimado + self.lambdaCoefficient * (
                    self._salidas_esperadas_entrenamiento[i - 1] - valorEstimado)
                self._salidas_esperadas_entrenamiento.append(valorAAprender)

    def entrenar(self):
        '''
            Aplico el entrenamiento a partir de los datos almacenados
        '''
        self._nn.partial_fit(self._entradas_entrenamiento, self._salidas_esperadas_entrenamiento)
        self._entradas_entrenamiento = []
        self._salidas_esperadas_entrenamiento = []

    def almacenar(self):
        '''
            Serializo y persisto la red
        '''
        pickle.dump(self._nn, open(self.path, 'wb'))

    def cargar(self, path, red):
        '''
            Deserealizo o creo una nueva red
        '''
        self.path = path
        if os.path.isfile(path):
            # Si el archivo especificado existe, deserealizo la red
            self._nn = pickle.load(open(path, 'rb'))
        else:
            # Si no, inicializo la red especificada
            self._nn = red
            tableroVacio = ([EnumCasilla.EMPTY.value for _ in xrange(64)], 0)
            self.agregar_a_entrenamiento([tableroVacio], EnumResultado.EMPATE)
            self.entrenar()
示例#36
0
# Multilayer Perceptron

# General Example
# Notes say that we should make sure to normalize this data before using for ANN

from sklearn.neural_network import MLPRegressor

mlp_reg = MLPRegressor(
    hidden_layer_sizes=10,  #tuple eqaul to number of hidden layers which shows 
    # number of neuron in each layer
    activation='relu',  #can be identity, logistic, tanh, relu
    solver='adam',  # lbfgs, sgd, adam
    learning_rate='constant',  # better to be fixed as 'constant'
    learning_rate_init=0.01,  #controls the step size in updating weights
    max_iter=1000,  # maximum number of iterations to stop
    tol=0.0001)  #tolerance for optimization

mlp_reg.fit(X, y)
mlp_reg.predict(X_dash)

#######################
#Example with sine data
#######################
from sklearn.neural_network import MLPRegressor
import numpy as np
import matplotlib.pyplot as plt

x = np.arange(
    0.0, 1,
    0.01)  #100 row matrix (Note that the output doesn't have correct size)
x = x.reshape(-1, 1)  #reshaping it to a 100 by 1
def get_stacking_model():
	model = MLPRegressor(hidden_layer_sizes=(20,20))
	X_train,y_train,_,_ = get_data()
	model.fit(X_train,y_train)
	return model
示例#38
0
# 0.3600836547592847
#print(reg.coef_)

from sklearn.metrics import mean_squared_error

score = mean_squared_error(y_test, reg.predict(X_test))
print("linear regression: ", score)
#############################################################


#####################################################################
from sklearn.neural_network import MLPRegressor

nn = MLPRegressor(
    hidden_layer_sizes=(10,),  activation='relu', solver='adam', alpha=0.001, batch_size='auto',
    learning_rate='constant', learning_rate_init=0.01, power_t=0.5, max_iter=1000, shuffle=True,
    random_state=9, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)

n = nn.fit(X_train, y_train)

#y_pred = nn.predict(X_train)
scr = nn.score(X_test, y_test)
#print(scr)
score = mean_squared_error(y_test, nn.predict(X_test))
print("MLPregressor: ", score)
#0.3702685509025747
##########################################################################

##########################################
from sklearn.svm import SVR
示例#39
0
from scipy.optimize import curve_fit
DEBUG = False

fitted = False
nn_threshold = 0.8 #threshold for update the learn
metricsNN = [] # metrics for Neural Networks [U,A,Qu,Q,Rt]
coefNN = []  #Coefficients for Neural Networks [c1,c2,c3,c4]
metrics = [] #For nonlinear regression [U,A,Qu,Q]
respTime = [] #For nonlinear regression
monitors = []
currentRsquare = 0.0 # Current best value for Rsquare
currentCoefficients = [0.1,1,0.001,-0.8] # Current best values for coefficients [c1,c2,c3,c4]
defaultCoefficients = [0.1,1,0.001,-0.8]
historic = {'c1':[],'c2':[],'c3':[],'c4':[],'rsquared':[]}

clf = MLPRegressor(solver='lbfgs',alpha=1e-5, random_state=1, activation='tanh',hidden_layer_sizes=(100,5), learning_rate = 'adaptive')

frontpage = """<html>
	<head></head>
	<meta http-equiv="refresh" content="40">
		<body>
			<form method="get" action="addMonitoringData">
				<input type="text" value="" name="name"/>
				<button type="submit">Add a parameter</button>
			</form>
			<table style=\"width:100%\"> 
				<caption>Monitoring Data</caption>
				<tr><td>Normalized Response Time</td><td>Guiltiness</td><td>[U,A,Qu,Q]</td></tr>
			"""

def adjust_coeff(U,A,Qu,Q,Rt):
示例#40
0
for b in range(len(training_x)):  # number of bootstrapped samples

    Xtrain = training_x[b]
    ytrain = training_y[b]
    Xtest = test_x[b]

    predictions_recall = []

    c = 1
    for p in [20]:
        for q in [15]:

            reg = MLPRegressor(alpha=1e-4,
                               hidden_layer_sizes=(p, q),
                               random_state=1,
                               activation="tanh",
                               batch_size=64,
                               max_iter=500)

            for i in range(21):
                predictions = []
                reg.fit(Xtrain, ytrain[:, i])

                for j in range(len(Xtest)):
                    pred_y_test = reg.predict(Xtest[j].reshape(1, -1))
                    predictions.append(pred_y_test)

                prediction = np.array(predictions).reshape(-1, 1)
                predictions_recall.append(prediction)

                c = c + 1
示例#41
0
def test_shuffle():
    # Test that the shuffle parameter affects the training process (it should)
    X, y = make_regression(n_samples=50, n_features=5, n_targets=1,
                           random_state=0)

    # The coefficients will be identical if both do or do not shuffle
    for shuffle in [True, False]:
        mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                            random_state=0, shuffle=shuffle)
        mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                            random_state=0, shuffle=shuffle)
        mlp1.fit(X, y)
        mlp2.fit(X, y)

        assert np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])

    # The coefficients will be slightly different if shuffle=True
    mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                        random_state=0, shuffle=True)
    mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                        random_state=0, shuffle=False)
    mlp1.fit(X, y)
    mlp2.fit(X, y)

    assert not np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

from models import models_util

dataset = models_util.load_metadata_dataset()
# Difficulty and BPM input features.
X_train = dataset[:, 0:2]
y = dataset[:, 5]

model = make_pipeline(StandardScaler(),
                      MLPRegressor(hidden_layer_sizes=(10, )))
model.fit(X_train, y)

models_util.save_model(model, models_util.MetadataPredictor.ACCURACY)
print("Trained model saved.")
示例#43
0
from datetime import datetime

startTime = datetime.now()

fileTrain = open("fingerDataTrain.dat",'r')
fileVal = open("fingerDataVal.dat",'r')
trainingSet = np.loadtxt(fileTrain)
valSet = np.loadtxt(fileVal)
fileTrain.close()
fileVal.close()

trainX = trainingSet[:,:13]
trainY = trainingSet[:,14:]
valX = valSet[:,:13]
valY = valSet[:,14:]

for i in range(trainX.shape[1]):
    m = trainX[:,i].mean()
    s = trainX[:,i].std()
    trainX[:,i] = (trainX[:,i]-m)/s
    valX[:,i] = (valX[:,i]-m)/s


ann = MLPRegressor()
ann.fit(trainX,trainY)
sqError = ((ann.predict(valX)-valY)**2).mean()

plt.scatter(valX[:,1], valY[:,3],  color='black')
plt.plot(valX[:,1], ann.predict(valX)[:,3], color='blue', linewidth=3)

print datetime.now() - startTime
示例#44
0
 def initialize_model(self, total_num_actions: int, start_features):
     model = MLPRegressor(hidden_layer_sizes=(1024,), learning_rate="constant")
     model.partial_fit(self._features_to_model_input(start_features), np.zeros((1, total_num_actions)))
     self.model = model
示例#45
0
    dataframes = [
        demand_weather_14_17.drop(columns=['Date']),
        demand_weather_14_16.drop(columns=['Date']),
        demand_weather_17.drop(columns=['Date'])
    ]
    run_sim_datasets(dataframes, plot=False)

    print('\n\n---- Model Evaluation ----')
    evaluate_models(demand_weather_14_16, demand_weather_17)

    print('\n\n---- Run the Simulator ----')
    # MLP and Linear models performed rather good on evaluate_models, therefore we test them on the simulation using
    # the parameters that we gained from the model evaluation
    mlp = MLPRegressor(alpha=1e-6,
                       hidden_layer_sizes=[10],
                       random_state=0,
                       solver='lbfgs',
                       max_iter=1000000)
    lr = LinearRegression()
    lasso = Lasso(alpha=0.0001)
    ridge = Ridge(alpha=1e-7)
    svr = LinearSVR(C=100)
    models = [lr, lasso, ridge, svr, mlp]
    for model in models:
        run_sim_ml_dataset(model, demand_weather_14_17, plot=False)

# =============================================================================
# With a ss=1.1, MLP and Lasso score best. Therefore, I would use Lasso
# regression, which has most feature coefficients on 0 and is easily
# explainable.
# =============================================================================
# In[79]:


print("Gardient Boosting Features Importance")
headers = ["name", "score"]
values = sorted(zip(x_train.columns, m3.feature_importances_), key=lambda x: x[1]*-1)
print(tabulate(values, headers, tablefmt="plain"))


# In[54]:


from sklearn.neural_network  import MLPRegressor

m2 = MLPRegressor(hidden_layer_sizes=(128, 11), learning_rate_init=0.01, verbose=True, max_iter=1000)
m2.fit(x_train, y_train)


# In[42]:


score = r2_score(y_test,m2.predict(x_test))
print(score)


# In[43]:


sub = pd.DataFrame()
sub['Id'] = test_df['Id']
y3_p = model.predict(X_plot)

'''
model.fit(X4, y4)
y4_p = model.predict(X_plot)
model.fit(X5, y5)
y5_p = model.predict(X_plot)
'''

y6_p = shift(y1_p, -30, cval=0)
y7_p = shift(y1_p, -15, cval=0)
X = np.column_stack([X_plot, y1_p, y2_p, y3_p, y6_p, y7_p])
y = shift(y1_p, 30, cval=0)

#poly = make_pipeline(PolynomialFeatures(3), Ridge())
mpl = MLPRegressor(beta_1=0.99)
'''
y_t = y[-1000:-2]
y = y[0:-1000]
X_t = X[-1000:-2]
X = X[0:-1000]
mpl.fit(X, y)
poly.fit(X, y)
mpl_pred = mpl.predict(X_t)
poly_pred = poly.predict(X_t)
'''
mpl_pred = cross_val_predict(mpl, X, y, cv=10)
#poly_pred = cross_val_predict(poly, X, y, cv=10)
#nn_pred = cross_val_predict(model, X, y, cv=10)
print mpl.get_params()
示例#48
0
		("regressor", regressor)
	])
	pipeline.fit(housing_X, housing_y)
	customize(regressor, **kwargs)
	store_pkl(pipeline, name + ".pkl")
	medv = DataFrame(pipeline.predict(housing_X), columns = ["MEDV"])
	if(with_kneighbors == True):
		Xt = pipeline_transform(pipeline, housing_X)
		kneighbors = regressor.kneighbors(Xt)
		medv_ids = DataFrame(kneighbors[1] + 1, columns = ["neighbor(" + str(x + 1) + ")" for x in range(regressor.n_neighbors)])
		medv = pandas.concat((medv, medv_ids), axis = 1)
	store_csv(medv, name + ".csv")

build_housing(AdaBoostRegressor(DecisionTreeRegressor(random_state = 13, min_samples_leaf = 5), random_state = 13, n_estimators = 17), "AdaBoostHousing")
build_housing(KNeighborsRegressor(), "KNNHousing", with_kneighbors = True)
build_housing(MLPRegressor(activation = "tanh", hidden_layer_sizes = (26,), solver = "lbfgs", random_state = 13, tol = 0.001, max_iter = 1000), "MLPHousing")
build_housing(SGDRegressor(random_state = 13), "SGDHousing")
build_housing(SVR(), "SVRHousing")
build_housing(LinearSVR(random_state = 13), "LinearSVRHousing")
build_housing(NuSVR(), "NuSVRHousing")

#
# Anomaly detection
#

def build_iforest_housing_anomaly(iforest, name, **kwargs):
	mapper = DataFrameMapper([
		(housing_X.columns.values, ContinuousDomain())
	])
	pipeline = PMMLPipeline([
		("mapper", mapper),
training_data_input = ss_x.fit_transform(training_data_input)  # 估算每个特征的平均值和标准差
test_data_input = ss_x.transform(
    test_data_input)  # 注意:这里我们要用同样的参数来标准化测试集,使得测试集和训练集之间有可比性

ss_y = preprocessing.StandardScaler()
training_data_output = ss_y.fit_transform(training_data_output)
test_data_output = ss_y.transform(test_data_output)  # 使得test_y_disorder变成一列

n_folds = 6  # 设置交叉检验的次数
model_br = BayesianRidge()  # 建立贝叶斯岭回归模型对象
model_lr = LinearRegression()  # 建立普通线性回归模型对象
model_etc = ElasticNet()  # 建立弹性网络回归模型对象
model_svr = SVR()  # 建立支持向量机回归模型对象
model_gbr = GradientBoostingRegressor()  # 建立梯度增强算法回归模型对象
model_mlp = MLPRegressor(solver='lbfgs',
                         hidden_layer_sizes=(20, 20, 20),
                         random_state=1)
model_names = [
    'BayesianRidge', 'LinearRegression', 'ElasticNet', 'SVR', 'GBR', 'MLP'
]  # 不同模型的名称列表
model_dic = [model_br, model_lr, model_etc, model_svr, model_gbr,
             model_mlp]  # 不同回归模型对象的集合

cv_score_list = []  # 交叉检验结果列表
pre_y_list = []  # 各个回归模型预测的y值列表
for model in model_dic:  # 读出每个回归模型对象
    # 将每个回归模型导入交叉检验模型中做训练检验
    scores = cross_val_score(model,
                             training_data_input,
                             training_data_output.ravel(),
                             cv=n_folds)
示例#50
0
class QN(object):
    def __init__(self, num_inputs, num_outputs):
        self.nx = num_inputs
        self.ny = num_outputs
        self.net = MLPRegressor(hidden_layer_sizes=(50, 10),
                                max_iter=1,
                                algorithm='sgd',
                                learning_rate='constant',
                                learning_rate_init=0.001,
                                warm_start=True,
                                momentum=0.9,
                                nesterovs_momentum=True)

        self.initialize_network()

        # set experience replay
        self.mbsize = 128  # mini-batch size
        self.er_s = []
        self.er_a = []
        self.er_r = []
        self.er_done = []
        self.er_sp = []

        self.er_size = 2000  # total size of mb, impliment as queue
        self.whead = 0  # write head

    def initialize_network(self):
        # function to initialize network weights
        xtrain = np.random.rand(256, self.nx)
        ytrain = 10 + np.random.rand(256, self.ny)
        self.net.fit(xtrain, ytrain)

    def update_network(self):
        # function updates network by sampling a mini-batch from the ER
        # Prepare train data
        chosen = list(
            np.random.randint(len(self.er_s),
                              size=min(len(self.er_s), self.mbsize)))
        Xtrain = np.asarray([self.er_s[i] for i in chosen])
        # calculate target
        target = np.random.rand(len(chosen), self.ny)

        for j, i in enumerate(chosen):
            # do a forward pass through s and sp
            Q_s = self.net.predict(self.er_s[i].reshape(1, -1))
            Q_sp = self.net.predict(self.er_sp[i].reshape(1, -1))
            target[j, :] = Q_s  # target initialized to current prediction

            if (self.er_done[i] == True):
                target[j, self.er_a[i]] = self.er_r[
                    i]  # if end of episode, target is terminal reward
            else:
                target[j, self.er_a[i]] = self.er_r[i] + 0.9 * max(
                    max(Q_sp))  # Q_sp is list of list (why?)

        # fit the network
        self.net.fit(Xtrain, target)  # single step of SGD

    def append_memory(self, s, a, r, sp, done):
        if (len(self.er_s) < self.er_size):
            self.er_s.append(s)
            self.er_a.append(a)
            self.er_r.append(r)
            self.er_sp.append(sp)
            self.er_done.append(done)
            self.whead = (self.whead + 1) % self.er_size
        else:
            self.er_s[self.whead] = s
            self.er_a[self.whead] = a
            self.er_r[self.whead] = r
            self.er_sp[self.whead] = sp
            self.er_done[self.whead] = done
            self.whead = (self.whead + 1) % self.er_size
from sklearn.neural_network import MLPRegressor
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error



data = pd.read_csv('network_backup_dataset.csv')
train = data.loc[:,['WeekNumber','DayofWeek','BackupStartTime','WorkFlowID','FileName','BackupTime']]
target = data.loc[:,['SizeofBackup']]
mlp = MLPRegressor(algorithm='sgd', hidden_layer_sizes=150,
                   max_iter=200, shuffle=False, random_state=1)

mlp.fit(train, target)
prediction = mlp.predict(train)

plt.plot(prediction,label='Prediction',color='red')
plt.plot(target,label='Real Data',color='blue')
plt.title('Copy Size versus Time based on Neural Network Regression')
plt.xlabel('Time')
plt.ylabel('Copy Size')
plt.legend()
plt.show()

rmse = mean_squared_error(target.SizeofBackup,prediction)**0.5
print (rmse)
#KNN
from sklearn.neighbors import KNeighborsRegressor
KNN = KNeighborsRegressor()
knn_param_grid = {'n_neighbors':[3,10]}
knn_grid = model_selection.GridSearchCV(KNN, knn_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error')
knn_grid.fit(X_train, y_train)
print(' Best  Params:' + str(knn_grid.best_params_))
KNN = KNeighborsRegressor(n_neighbors=10)
KNN.fit(X_train, y_train)
y_predict_knn=KNN.predict(X_test)
mae_knn=(np.abs(y_predict_knn-y_test)).sum()/9467
joblib.dump(KNN, 'KNN.model')
print(mae_knn)
#mlp
from sklearn.neural_network import MLPRegressor
MLP = MLPRegressor(hidden_layer_sizes=(300, 200,200),max_iter=100,activation='relu')
MLP.fit(X_train, y_train)
y_predict_MLP=MLP.predict(X_test)
mae_MLP=(np.abs(y_predict_MLP-y_test)).sum()/9467
joblib.dump(MLP, 'MLP.model')
print(mae_MLP)
#xgb
import xgboost  as xgb
x_regress = xgb.XGBRegressor(max_depth=20,n_estimators =5000)
x_regress_param_grid = {'max_depth': [5,20]}
x_regress_grid = model_selection.GridSearchCV(x_regress, x_regress_param_grid, cv=10, n_jobs=25, verbose=1, scoring='neg_mean_squared_error')
x_regress.fit(X_train, y_train)
joblib.dump(x_regress, 'x_regress_grid.model')
y_predict_xgb=x_regress.predict(X_test)

mae_xgb=(np.abs(y_predict_xgb-y_test)).sum()/9467
示例#53
0
class QN(object):
    def __init__(self, num_inputs, num_outputs):
        self.nx = num_inputs
        self.ny = num_outputs
        self.net = MLPRegressor(hidden_layer_sizes=(50, 10),
                                max_iter=1,
                                algorithm='sgd',
                                learning_rate='constant',
                                learning_rate_init=0.001,
                                warm_start=True,
                                momentum=0.9,
                                nesterovs_momentum=True
                                )

        self.initialize_network()

        # set experience replay
        self.mbsize = 128 # mini-batch size
        self.er_s = []
        self.er_a = []
        self.er_r = []
        self.er_done = []
        self.er_sp = []

        self.er_size = 2000  # total size of mb, impliment as queue
        self.whead = 0  # write head

    def initialize_network(self):
        # function to initialize network weights
        xtrain = np.random.rand(256, self.nx)
        ytrain = 10 + np.random.rand(256, self.ny)
        self.net.fit(xtrain, ytrain)

    def update_network(self):
        # function updates network by sampling a mini-batch from the ER
        # Prepare train data
        chosen = list(np.random.randint(len(self.er_s), size=min(len(self.er_s), self.mbsize)))
        Xtrain = np.asarray([self.er_s[i] for i in chosen])
        # calculate target
        target = np.random.rand(len(chosen), self.ny)

        for j, i in enumerate(chosen):
            # do a forward pass through s and sp
            Q_s = self.net.predict(self.er_s[i].reshape(1, -1))
            Q_sp = self.net.predict(self.er_sp[i].reshape(1, -1))
            target[j, :] = Q_s  # target initialized to current prediction

            if (self.er_done[i] == True):
                target[j, self.er_a[i]] = self.er_r[i]  # if end of episode, target is terminal reward
            else:
                target[j, self.er_a[i]] = self.er_r[i] + 0.9 * max(max(Q_sp))  # Q_sp is list of list (why?)

        # fit the network
        self.net.fit(Xtrain, target)  # single step of SGD

    def append_memory(self, s, a, r, sp, done):
        if (len(self.er_s) < self.er_size):
            self.er_s.append(s)
            self.er_a.append(a)
            self.er_r.append(r)
            self.er_sp.append(sp)
            self.er_done.append(done)
            self.whead = (self.whead + 1) % self.er_size
        else:
            self.er_s[self.whead] = s
            self.er_a[self.whead] = a
            self.er_r[self.whead] = r
            self.er_sp[self.whead] = sp
            self.er_done[self.whead] = done
            self.whead = (self.whead+1) % self.er_size
示例#54
0
print(clf3.score(input.T[384:],targets[384:]))

from sklearn import ensemble

clf4=ensemble.RandomForestRegressor(random_state=0)
clf4.fit(input.T[0:384],targets[0:384])

print(clf4.score(input.T[384:],targets[384:]))
import matplotlib.pyplot as plt

parameters={ 'min_samples_split':[2,10], 'max_depth':[3, 15],'n_estimators':[10,50]}
from sklearn import grid_search
###### Step 5 Testing With Neural Networks
from sklearn.neural_network import MLPRegressor

clf5=MLPRegressor(random_state=0,hidden_layer_sizes=500,activation='logistic',max_iter=500,)
clf5.fit(input.T[0:384],targets[0:384])
pred=clf5.predict(input.T[384:])
pred_train=clf5.predict(input.T[0:384])
print(clf5.score(input.T[384:],targets[384:]))
print(clf5.get_params())

### Plotting the results

plt.figure(1)
plt.plot(range(0,len(targets[384:])),pred,'red',range(0,len(targets[384:])),targets[384:],'blue')
plt.figure(2)
plt.plot(range(0,len(targets[0:384])),pred_train,'red',range(0,len(targets[0:384])),targets[0:384],'blue')
plt.show()
from sklearn.externals import joblib
示例#55
0
    "Random Forest",
    "Neural Net",
    "AdaBoost",
    # "Naive Bayes",
    # "QDA",
    "Gessaman",
]

regressors = [
    KNeighborsRegressor(3),
    SVR(kernel="linear", C=0.025),
    SVR(gamma=2, C=1),
    # GaussianProcessRegressor(1.0 * RBF(1.0)),
    DecisionTreeRegressor(max_depth=5),
    RandomForestRegressor(max_depth=5, n_estimators=20, max_features=1),
    MLPRegressor(alpha=1),
    AdaBoostRegressor(),
    # GaussianNB(),
    # QuadraticDiscriminantAnalysis(),
    Gessaman(nb_jobs=-1),
]

# X, y = make_classification(
#     n_features=2,
#     n_redundant=0,
#     n_informative=2,
#     random_state=1,
#     n_clusters_per_class=1,
#     n_samples=n_samples,
# )
# rng = np.random.RandomState(2)
示例#56
0
geno = np.load('genodata.npy')
pheno = np.load('phenodata.npy')

X_tr = geno[:1000,1:]   #slicing geno 
#X_va = geno[201:250,:]
X_te = geno[1001:,1:]
Y_tr = pheno[:1000,1:]   #slicing pheno
#Y_va = pheno[201:250,:]
Y_te = pheno[1001:,1:]

diabetes_X_train = X_tr
diabetes_X_test = X_te
diabetes_y_train = Y_tr
diabetes_y_test = Y_te

reg = MLPRegressor(hidden_layer_sizes=(1, ),algorithm='l-bfgs')
reg.fit(X_tr,Y_tr)

scores = cross_val_score(reg,geno[:,1:],pheno[:,1:],cv=10)

#Result_Y = np.zeros((249,1), dtype='float64')
Result_Y = reg.predict(X_te)
#Yte = np.array(Y_te, dtype=np.float64) 
r_row,p_score = pearsonr(Result_Y,Y_te)

# The mean square error
print("Residual sum of squares: %.2f"
      % np.mean((reg.predict(diabetes_X_test) - diabetes_y_test) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % reg.score(diabetes_X_test, diabetes_y_test))
print(Result_Y)
示例#57
0
 GradientBoostingRegressor(random_state=50),
 linear_model.HuberRegressor(),
 KNeighborsRegressor(),
 KernelRidge(),
 linear_model.Lars(),
 linear_model.LarsCV(),
 linear_model.Lasso(),
 linear_model.LassoCV(),
 linear_model.LassoLars(),
 linear_model.LassoLarsCV(),
 linear_model.LassoLarsIC(),
 linear_model.LinearRegression(),
 LinearSVR(),
 #linear_model.LogisticRegression(),
 #linear_model.LogisticRegressionCV(),
 MLPRegressor(),
 #linear_model.ModifiedHuber(),
 #linear_model.MultiTaskElasticNet(),
 #linear_model.MultiTaskElasticNetCV(),
 #linear_model.MultiTaskLasso(),
 #linear_model.MultiTaskLassoCV(),
 NuSVR(),
 linear_model.OrthogonalMatchingPursuit(),
 linear_model.OrthogonalMatchingPursuitCV(),
 PLSCanonical(),
 PLSRegression(),
 linear_model.PassiveAggressiveRegressor(),
 linear_model.RANSACRegressor(),
 RadiusNeighborsRegressor(),
 RandomForestRegressor(),
 #linear_model.RandomizedLasso(),
示例#58
0
#Example  with a Regressor using the scikit-learn library
# example for the XOr gate
from sklearn.neural_network import MLPRegressor 

X = [[0., 0.],[0., 1.], [1., 0.], [1., 1.]] # each one of the entries 00 01 10 11
y = [0, 1, 1, 0] # outputs for each one of the entries

# check http://scikit-learn.org/dev/modules/generated/sklearn.neural_network.MLPRegressor.html#sklearn.neural_network.MLPRegressor
#for more details
reg = MLPRegressor(hidden_layer_sizes=(5),activation='tanh', algorithm='sgd', alpha=0.001, learning_rate='constant',
                   max_iter=10000, random_state=None, verbose=False, warm_start=False, momentum=0.8, tol=10e-8, shuffle=False)

reg.fit(X,y)

outp =  reg.predict([[0., 0.],[0., 1.], [1., 0.], [1., 1.]])

print'Results:'
print '0 0 0:', outp[0]
print '0 1 1:', outp[1]
print '1 0 1:', outp[2]
print '1 1 0:', outp[0]
print'Score:', reg.score(X, y)
def get_mlp_regressor(num_hidden_units=51):
    mlp = MLPRegressor(hidden_layer_sizes=num_hidden_units)
    return [mlp], ['Multi-Layer Perceptron']
示例#60
-1
def regression(N, P):
    assert len(N) == len(P)
    
    clf = MLPRegressor(hidden_layer_sizes=(15, ), activation='relu', algorithm='adam', alpha=0.0001)
    
    clf.fit (N, P)
    return clf