Exemplo n.º 1
0
    def train(self, X, y=None):
        '''
        :param X: training data
        :param y: dependent variables (responses)
        :return: Linear Regression model object
        '''

        # Training data and responses
        Input = IInput.HomogenousDaalData(X).getNumericTable()
        Responses = IInput.HomogenousDaalData(y).getNumericTable()

        # Training object with/without normalization
        if self.method:
            linear_training_algorithm = linear_training.Batch(
                method=self.method)
        else:
            linear_training_algorithm = linear_training.Batch()

        # set input values
        linear_training_algorithm.input.set(linear_training.data, Input)
        linear_training_algorithm.input.set(linear_training.dependentVariables,
                                            Responses)
        # check if intercept flag is set
        linear_training_algorithm.parameter.interceptFlag = True \
            if 'intercept' in self.parameters else False
        # calculate
        res = linear_training_algorithm.compute()
        # return trained model
        self.model = res.get(linear_training.model)
        return self.model
Exemplo n.º 2
0
    def test_linear_regression_simple():

        # calculate beta coefficients
        x = np.array([0., 2., 3.]).reshape(3, 1)

        nt_x = nt_y = HomogenNumericTable(x)

        lr_alg = linear_training.Batch(method=linear_training.qrDense)
        lr_alg.input.set(linear_training.data, nt_x)
        lr_alg.input.set(linear_training.dependentVariables, nt_y)
        result = lr_alg.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta_coeff = getNumpyArray(beta_coeff)

        res_beta_coeff = np.array([0, 1]).reshape(1, 2)

        assert_almost_equal(res_beta_coeff, np_beta_coeff)

        # predict
        lr_alg_predict = linear_prediction.Batch()
        lr_alg_predict.input.setModel(linear_prediction.model, model)
        lr_alg_predict.input.setTable(linear_prediction.data, nt_x)
        result = lr_alg_predict.compute()
        np_predict = getNumpyArray(result.get(linear_prediction.prediction))
        assert_array_almost_equal(x, np_predict)
Exemplo n.º 3
0
    def training(self, trainData, trainDependentVariables):

        if self.method == 'normEq':
            method = training.normEqDense
        elif self.method == 'qr':
            method = training.qrDense
        else:
            warnings.warn ('Invalid method, using default dense Normal Equation method')
            method = training.normEqDense
        algorithm = training.Batch(method=method, fptype = self.dtype)

        # Pass a training data set and dependent values to the algorithm
        algorithm.input.set (training.data, trainData)
        algorithm.input.set (training.dependentVariables, trainDependentVariables)
        algorithm.parameter.interceptFlag = self.interceptFlag

        # Build linear regression model and retrieve the algorithm results
        trainingResult = algorithm.compute()
        return trainingResult
Exemplo n.º 4
0
    def get_daal_prediction(x=np.array([1, 2, 3]), y=np.array([1, 2, 3])):
        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)

        lr_predict = linear_prediction.Batch()
        lr_predict.input.setModel(linear_prediction.model, model)
        lr_predict.input.setTable(linear_prediction.data, ntX)
        result = lr_predict.compute()

        np_predicted = getNumpyArray(result.get(linear_prediction.prediction))
        # assert the same as the initial dependent variable
        assert_array_almost_equal(y, np_predicted)
        return np_predicted
Exemplo n.º 5
0
    def test_coeff_size(rows=10, columns=9):
        '''
        number of beta coefficients (with intercept flag on)
        is the same number as size of data sample
        '''
        inout = get_random_array(rows, columns)
        test_overfitting(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)

        assert y.transpose().shape == np_beta.shape, "Dependent variable size must have\
Exemplo n.º 6
0
    def test_intercept_flag(rows=10, columns=9):
        inout = get_random_array(rows, columns)
        test_overfitting(rows, columns)
        x = inout[0]
        y = inout[1]

        ntX = HomogenNumericTable(x)
        ntY = HomogenNumericTable(y)

        lr_train = linear_training.Batch()
        lr_train.input.set(linear_training.data, ntX)
        lr_train.input.set(linear_training.dependentVariables, ntY)
        result = lr_train.compute()
        model = result.get(linear_training.model)
        beta_coeff = model.getBeta()
        np_beta = getNumpyArray(beta_coeff)
        daal_intercept = np_beta[0, 0]

        from sklearn.linear_model.base import LinearRegression as ScikitLinearRegression
        regression = ScikitLinearRegression()
        regression.fit(x, y)

        scikit_intercept = regression.intercept_
        assert_array_almost_equal(scikit_intercept, [daal_intercept])