Пример #1
0
def test_conf_int_single_regressor():
    # GH#706 single-regressor model (i.e. no intercept) with 1D exog
    # should get passed to DataFrame for conf_int
    y = pandas.Series(np.random.randn(10))
    x = pandas.Series(np.ones(10))
    res = OLS(y, x).fit()
    conf_int = res.conf_int()
    np.testing.assert_equal(conf_int.shape, (1, 2))
    np.testing.assert_(isinstance(conf_int, pandas.DataFrame))
Пример #2
0
def test_706():
    # make sure one regressor pandas Series gets passed to DataFrame
    # for conf_int.
    y = pandas.Series(np.random.randn(10))
    x = pandas.Series(np.ones(10))
    res = OLS(y, x).fit()
    conf_int = res.conf_int()
    np.testing.assert_equal(conf_int.shape, (1, 2))
    np.testing.assert_(isinstance(conf_int, pandas.DataFrame))
Пример #3
0
def test_706():
    # make sure one regressor pandas Series gets passed to DataFrame
    # for conf_int.
    y = pandas.Series(np.random.randn(10))
    x = pandas.Series(np.ones(10))
    res = OLS(y,x).fit()
    conf_int = res.conf_int()
    np.testing.assert_equal(conf_int.shape, (1, 2))
    np.testing.assert_(isinstance(conf_int, pandas.DataFrame))
Пример #4
0
def test_conf_int_single_regressor():
    # GH#706 single-regressor model (i.e. no intercept) with 1D exog
    # should get passed to DataFrame for conf_int
    y = pandas.Series(np.random.randn(10))
    x = pandas.Series(np.ones(10))
    res = OLS(y, x).fit()
    conf_int = res.conf_int()
    np.testing.assert_equal(conf_int.shape, (1, 2))
    np.testing.assert_(isinstance(conf_int, pandas.DataFrame))
Пример #5
0
def create_linear_model(X_train, X_test, Y_train, Y_test):
    ''' TODO...
        - Predict the wine quality using the test set and compare the accuracy to the actual quality. Comment.
        - Print the parameter estimates and their 95% confidence intervals in a single table. (Suggest using
          confint()), and cbind()
    '''

    X_train = add_constant(X_train)
    regressionResult = OLS(Y_train, X_train).fit()
    print(regressionResult.summary())

    # Print various attributes of the OLS fitted model
    # print("R Squared: {}".format(regressionResult.rsquared))
    # print("SSE: {}".format(regressionResult.ess))
    # print("SSR: {}".format(regressionResult.ssr))
    # print("Residual MSE: {}".format(regressionResult.mse_resid))
    # print("Total MSE: {}".format(regressionResult.mse_total))
    # print("Model MSE: {}".format(regressionResult.mse_model))
    # print("F-Value: {}".format(regressionResult.mse_model/regressionResult.mse_resid))
    # print("NOBS: {}".format(regressionResult.nobs))
    # print("Centered TSS: {}".format(regressionResult.centered_tss))
    # print("Uncentered TSS: {}".format(regressionResult.uncentered_tss))
    # print("DF Model: {}".format(regressionResult.df_model))
    # print("DF Resid: {}".format(regressionResult.df_resid))
    # print("Standard Errors: {}".format(regressionResult.bse))
    print("Confidence: {}".format(regressionResult.conf_int()))

    predictions = regressionResult.predict(X_train)

    nobs, p = X_train.shape
    eaic = extractAIC(nobs, p, Y_train, predictions)
    print("Extract AIC: {}".format(eaic))

    params = regressionResult.params

    # n, p = X_test.shape
    # X_test = add_constant(X_test)
    # predictions = X_test.dot(params).reshape(n,1)

    # num_matches = 0
    # for i in range(len(Y_test)):
    #     p = int(round(predictions[i][0], 0))
    #     is_match = (Y_test[i] == p)

    #     if is_match:
    #         num_matches += 1

    #     print("Actual: {}, Predictions: {}... Match: {}".format(Y_test[i], p, is_match))

    # print("Number of matches: {}, Total number of Instances: {}".format(num_matches, n))
    # print("Percent correct guesses: {}%".format(round((num_matches/n)*100, 3)))

    return params
class UnivariateLinearModelAnalysis():

    """
    Linear regression analysis with residuals hypothesis tests.
    
    **Available constructors:**

    UnivariateLinearModelAnalysis(*inputSample, outputSample*)

    UnivariateLinearModelAnalysis(*inputSample, outputSample, noiseThres,
    saturationThres, resDistFact, boxCox*)

    Parameters
    ----------
    inputSample : 2-d sequence of float
        Vector of the defect sizes, of dimension 1.
    outputSample : 2-d sequence of float
        Vector of the signals, of dimension 1.
    noiseThres : float
        Value for low censored data. Default is None.
    saturationThres : float
        Value for high censored data. Default is None.
    resDistFact : :py:class:`openturns.DistributionFactory`
        Distribution hypothesis followed by the residuals. Default is 
        :py:class:`openturns.NormalFactory`.
    boxCox : bool or float
        Enable or not the Box Cox transformation. If boxCox is a float, the Box
        Cox transformation is enabled with the given value. Default is False.

    Notes
    -----
    This method automatically :

    - computes the Box Cox parameter if *boxCox* is True,
    - computes the transformed signals if *boxCox* is True or a float,
    - builds the univariate linear regression model on the data,
    - computes the linear regression parameters for censored data if needed,
    - computes the residuals,
    - runs all hypothesis tests.

    Examples
    --------
    Generate data :

    >>> import openturns as ot
    >>> import otpod
    >>> N = 100
    >>> ot.RandomGenerator.SetSeed(0)
    >>> defectDist = ot.Uniform(0.1, 0.6)
    >>> epsilon = ot.Normal(0, 1.9)
    >>> defects = defectDist.getSample(N)
    >>> signalsInvBoxCox = defects * 43. + epsilon.getSample(N) + 2.5
    >>> invBoxCox = ot.InverseBoxCoxTransform(0.3)
    >>> signals = invBoxCox(signalsInvBoxCox)
    
    Run analysis with gaussian hypothesis on the residuals :

    >>> analysis = otpod.UnivariateLinearModelAnalysis(defects, signals, boxCox=True)
    >>> print analysis.getIntercept() # get intercept value
    [Intercept for uncensored case : 2.51037]
    >>> print analysis.getKolmogorovPValue()
    [Kolmogorov p-value for uncensored case : 0.835529]
    
    Run analysis with noise and saturation threshold :

    >>> analysis = otpod.UnivariateLinearModelAnalysis(defects, signals, 60., 1700., boxCox=True)
    >>> print analysis.getIntercept() # get intercept value for uncensored and censored case
    [Intercept for uncensored case : 4.28758, Intercept for censored case : 3.11243]
    >>> print analysis.getKolmogorovPValue()
    [Kolmogorov p-value for uncensored case : 0.346827, Kolmogorov p-value for censored case : 0.885006]

    Run analysis with a Weibull distribution hypothesis on the residuals

    >>> analysis = otpod.UnivariateLinearModelAnalysis(defects, signals, 60., 1700., ot.WeibullFactory(), boxCox=True)
    >>> print analysis.getIntercept() # get intercept value for uncensored and censored case
    [Intercept for uncensored case : 4.28758, Intercept for censored case : 3.11243]
    >>> print analysis.getKolmogorovPValue()
    [Kolmogorov p-value for uncensored case : 0.476036, Kolmogorov p-value for censored case : 0.71764]
    """

    def __init__(self, inputSample, outputSample, noiseThres=None,
                 saturationThres=None, resDistFact=None,
                 boxCox=False):

        self._inputSample = ot.NumericalSample(np.vstack(inputSample))
        self._outputSample = ot.NumericalSample(np.vstack(outputSample))
        self._noiseThres = noiseThres
        self._saturationThres = saturationThres
        # Add flag to tell if censored data must taken into account or not.
        if noiseThres is not None or saturationThres is not None:
            # flag to tell censoring is enabled
            self._censored = True
            # Results instances are created for both cases.
            self._resultsCens = _Results()
            self._resultsUnc = _Results()
        else:
            self._censored = False
            # Results instance is created only for uncensored case.
            self._resultsUnc = _Results()

        if resDistFact is None:
            # default is NormalFactory
            self._resDistFact = ot.NormalFactory()
        else:
            self._resDistFact = resDistFact

        # if Box Cox is a float the transformation is enabled with the given value
        if type(boxCox) is float:
            self._lambdaBoxCox = boxCox
            self._boxCox = True
        else:
            self._lambdaBoxCox = None
            self._boxCox = boxCox

        self._size = self._inputSample.getSize()
        self._dim = self._inputSample.getDimension()

        # Assertions on parameters
        assert (self._size >=3), "Not enough observations."
        assert (self._size == self._outputSample.getSize()), \
                "InputSample and outputSample must have the same size."
        assert (self._dim == 1), "Dimension of inputSample must be 1."
        assert (self._outputSample.getDimension() == 1), "Dimension of outputSample must be 1."

        # run the analysis
        self._run()
        # print warnings
        self._printWarnings()

    def _run(self):
        """
        Run the analysis :
        - Computes the Box Cox parameter if *boxCox* is True,
        - Computes the transformed signals if *boxCox* is True or a float,
        - Builds the univariate linear regression model on the data,
        - Computes the linear regression parameters for censored data if needed,
        - Computes the residuals,
        - Runs all hypothesis tests.
        """

        #################### Filter censored data ##############################
        if self._censored:
            # Filter censored data
            # Returns:
            # defects in the non censored area
            # defectsNoise in the noisy area
            # defectsSat in the saturation area
            # signals in the non censored area
            # check if one the threshold is None
            defects, defectsNoise, defectsSat, signals = \
                DataHandling.filterCensoredData(self._inputSample, self._outputSample,
                              self._noiseThres, self._saturationThres)
        else:
            defects, signals = self._inputSample, self._outputSample
        
        defectsSize = defects.getSize()

        ###################### Box Cox transformation ##########################
        # Compute Box Cox if enabled
        if self._boxCox:
            if self._lambdaBoxCox is None:
                # optimization required, get optimal lambda and graph
                self._lambdaBoxCox, self._graphBoxCox = computeBoxCox(defects, signals)

            # Transformation of data
            boxCoxTransform = ot.BoxCoxTransform([self._lambdaBoxCox])
            signals = boxCoxTransform(signals)
            if self._noiseThres is not None:
                noiseThres = boxCoxTransform([self._noiseThres])[0]
            else:
                noiseThres = self._noiseThres
            if self._saturationThres is not None:
                saturationThres = boxCoxTransform([self._saturationThres])[0]
            else:
                saturationThres = self._saturationThres
        else:
            noiseThres = self._noiseThres
            saturationThres = self._saturationThres

        ######################### Linear Regression model ######################
        # Linear regression with statsmodels module
        # Create the X matrix : [1, inputSample]
        X = ot.NumericalSample(defectsSize, [1, 0])
        X[:, 1] = defects
        self._algoLinear = OLS(np.array(signals), np.array(X)).fit()

        self._resultsUnc.intercept = self._algoLinear.params[0]
        self._resultsUnc.slope = self._algoLinear.params[1]
        # get standard error estimates (residuals standard deviation)
        self._resultsUnc.stderr = np.sqrt(self._algoLinear.scale)
        # get confidence interval at level 95%
        self._resultsUnc.confInt = self._algoLinear.conf_int(0.05)

        if self._censored:
            # define initial starting point for MLE optimization
            initialStartMLE = [self._resultsUnc.intercept, self._resultsUnc.slope,
                               self._resultsUnc.stderr]
            # MLE optimization
            res = computeLinearParametersCensored(initialStartMLE, defects,
                defectsNoise, defectsSat, signals, noiseThres, saturationThres)
            self._resultsCens.intercept = res[0]
            self._resultsCens.slope = res[1]
            self._resultsCens.stderr = res[2]

        ############################ Residuals #################################
        # get residuals from algoLinear
        self._resultsUnc.residuals = ot.NumericalSample(np.vstack(self._algoLinear.resid))
        # compute residuals distribution
        self._resultsUnc.resDist = self._resDistFact.build(self._resultsUnc.residuals)

        if self._censored:
            # create linear model function for censored case
            def CensLinModel(x):
                return self._resultsCens.intercept + self._resultsCens.slope * x

            # compute the residuals for the censored case.
            self._resultsCens.fittedSignals = CensLinModel(defects)
            self._resultsCens.residuals = signals - self._resultsCens.fittedSignals
            # compute residuals distribution.
            self._resultsCens.resDist = self._resDistFact.build(self._resultsCens.residuals)

        ########################## Compute tests ###############################
        self._resultsUnc.testResults = \
                self._computeTests(defects, signals, self._resultsUnc.residuals,
                                   self._resultsUnc.resDist)

        if self._censored:
            self._resultsCens.testResults = \
                self._computeTests(defects, signals, self._resultsCens.residuals,
                                   self._resultsCens.resDist)

        ################ Build the result lists to be printed ##################
        self._buildPrintResults()


################################################################################
###################### Hypothesis and validation tests #########################
################################################################################

    def _computeTests(self, defects, signals, residuals, resDist):

        testResults = {}
        # compute R2
        testResults['R2'] = computeR2(signals, residuals)

        # compute Anderson Darling test (normality test)
        testAnderDar = ot.NormalityTest.AndersonDarlingNormal(residuals)
        testResults['AndersonDarling'] = testAnderDar.getPValue()

        # compute Cramer Von Mises test (normality test)
        testCramVM = ot.NormalityTest.CramerVonMisesNormal(residuals)
        testResults['CramerVonMises'] = testCramVM.getPValue()

        # compute zero residual mean test
        testResults['ZeroMean'] = computeZeroMeanTest(residuals)

        # compute Kolmogorov test (fitting test)
        if LooseVersion(ot.__version__) == '1.6':
            testKol = ot.FittingTest.Kolmogorov(residuals, resDist, 0.95,
                                            resDist.getParametersNumber())
        elif LooseVersion(ot.__version__) > '1.6':
            testKol = ot.FittingTest.Kolmogorov(residuals, resDist, 0.95,
                                            resDist.getParameterDimension())

        testResults['Kolmogorov'] = testKol.getPValue()

        # compute Breusch Pagan test (homoskedasticity : constant variance)
        testResults['BreuschPagan'] = computeBreuschPaganTest(defects, residuals)

        # compute Harrison McCabe test (homoskedasticity : constant variance)
        testResults['HarrisonMcCabe'] = computeHarrisonMcCabeTest(residuals)

        # compute Durbin Watson test (autocorrelation == 0)
        testResults['DurbinWatson'] = computeDurbinWatsonTest(defects, residuals)

        return testResults

################################################################################
########################## Print and save results ##############################
################################################################################

    def getResults(self):
        """
        Print results of the linear analysis.
        """
        # Enable warning to be displayed
        ot.Log.Show(ot.Log.WARN)

        regressionResult = '\n'.join(['{:<47} {:>13} {:>13}'.format(*line) for
                            line in self._dataRegression])

        residualsResult = '\n'.join(['{:<47} {:>13} {:>13}'.format(*line) for 
                            line in self._dataResiduals])

        ndash = 80
        results = '-' * ndash + '\n'
        results = results + '         Linear model analysis results' + '\n'
        results = results + '-' * ndash + '\n'
        results = results + regressionResult + '\n'
        results = results + '-' * ndash + '\n'
        results = results + '' + '\n'
        results = results + '-' * ndash + '\n'
        results = results + '         Residuals analysis results' + '\n'
        results = results + '-' * ndash + '\n'
        results = results + residualsResult + '\n'
        results = results + '-' * ndash + '\n'
        results = results + '' + '\n'
        # print warnings if not empty
        if self._printWarnings(False).count('') != len(self._printWarnings(False)):
            results = results + 'Warning : ' + '\nWarning : '.join(['{}'.format(line) for 
                            line in self._printWarnings(False) if len(line)>0])
        return results

    def _printWarnings(self, disp=True):
        # Check results and display warnings

        valuesUnc = np.array(list(self._resultsUnc.testResults.values()))
        if self._censored:
            valuesCens = np.array(list(self._resultsCens.testResults.values()))
            testPValues = ((valuesUnc < 0.05).any() or (valuesCens < 0.05).any())
        else:
            testPValues = (valuesUnc < 0.05).any()

        # print warning if some pValues are less than 0.05
        msg = ["", "", ""]
        if testPValues and not self._boxCox:
            msg[0] = 'Some hypothesis tests failed : you may consider to use '+\
                        'the Box Cox transformation.'
            if disp:
                logging.warn(msg[0])
                # ot.Log.Warn(msg[0])
                # ot.Log.Flush()
        elif testPValues and self._boxCox:
            msg[1] = 'Some hypothesis tests failed : you may consider to use '+\
                'quantile regression or kriging (if input dimension > 1) to build POD.'
            if disp:
                logging.warn(msg[1])
                # ot.Log.Warn(msg[1])
                # ot.Log.Flush()

        if self._resultsUnc.resDist.getClassName() != 'Normal':
            msg[2] = 'Confidence interval, Normality tests and zero ' + \
                        'residual mean test are given assuming the residuals ' +\
                        'follow a Normal distribution.'
            if disp:
                logging.warn(msg[2])
                # ot.Log.Warn(msg[2])
                # ot.Log.Flush()
        # return msg for the test with pytest and the method getResult()
        return msg

    def saveResults(self, name):
        """
        Save all analysis test results in a file.

        Parameters
        ----------
        name : string
            Name of the file or full path name.

        Notes
        -----
        The file can be saved as a csv file. Separations are made with tabulations.

        If *name* is the file name, then it is saved in the current working
        directory.
        """
        regressionResult = '\n'.join(['{}\t{}\t{}'.format(*line) for
                                line in self._dataRegression])

        residualsResult = '\n'.join(['{}\t{}\t{}'.format(*line) for
                                line in self._dataResiduals])

        with open(name, 'w') as fd:
            fd.write('Linear model analysis results\n\n')
            fd.write(regressionResult)
            fd.write('\n\nResiduals analysis results\n\n')
            fd.write(residualsResult)
            # add warnings if not empty
            if self._printWarnings(False).count('') != len(self._printWarnings(False)):
                fd.write('\n\n')
                fd.write('Warning : ' + '\nWarning : '.join(['{}'.format(line) for 
                                line in self._printWarnings(False) if len(line)>0]))

    def _buildPrintResults(self):
        # Build the lists used in the printResult and saveResults methods :
        # self._dataRegression
        # self._dataResiduals

        # number of digits to be displayed
        n_digits = 2
        #format for confidence interval
        strformat =  "[{:0."+str(n_digits)+"f}, {:0."+str(n_digits)+"f}]"

        if self._boxCox:
            boxCoxstr = round(self._lambdaBoxCox, n_digits)
        else:
            boxCoxstr = "Not enabled"

        testResults = self._resultsUnc.testResults

        # create lists containing all results
        self._dataRegression = [
            ["Box Cox parameter :", boxCoxstr, ""],
            ["", "", ""],
            ["", "Uncensored", ""],
            ["", "", ""],
            ["Intercept coefficient :", round(self._resultsUnc.intercept, n_digits), ""],
            ["Slope coefficient :", round(self._resultsUnc.slope, n_digits), ""],
            ["Standard error of the estimate :", round(self._resultsUnc.stderr, n_digits), ""],
            ["", "", ""],
            ["Confidence interval on coefficients", "", ""],
            ["Intercept coefficient :", strformat.format(*self._resultsUnc.confInt[0]), ""],
            ["Slope coefficient :", strformat.format(*self._resultsUnc.confInt[1]), ""],
            ["Level :", 0.95, ""],
            ["", "", ""],
            ["Quality of regression", "", ""],
            ["R2 (> 0.8):", round(self._resultsUnc.testResults['R2'], n_digits), ""]]

        self._dataResiduals = [
            ["Fitted distribution (uncensored) :", self._resultsUnc.resDist.__str__(), ""],
            ["", "", ""],
            ["", "Uncensored", ""],
            ["Distribution fitting test", "", ""],
            ["Kolmogorov p-value (> 0.05):", round(testResults['Kolmogorov'], n_digits), ""],
            ["", "", ""],
            ["Normality test", "", ""],
            ["Anderson Darling p-value (> 0.05):", round(testResults['AndersonDarling'], n_digits), ""],
            ["Cramer Von Mises p-value (> 0.05):", round(testResults['CramerVonMises'], n_digits), ""],
            ["", "", ""],
            ["Zero residual mean test", "", ""],
            ["p-value (> 0.05):", round(testResults['ZeroMean'], n_digits), ""],
            ["", "", ""],
            ["Homoskedasticity test (constant variance)", "", ""],
            ["Breush Pagan p-value (> 0.05):", round(testResults['BreuschPagan'], n_digits), ""],
            ["Harrison McCabe p-value (> 0.05):", round(testResults['HarrisonMcCabe'], n_digits), ""],
            ["", "", ""],
            ["Non autocorrelation test", "", ""],
            ["Durbin Watson p-value (> 0.05):", round(testResults['DurbinWatson'], n_digits), ""]]

        if self._censored:
            # Add censored case results in the lists
            testResults = self._resultsCens.testResults

            self._dataRegression[2][2] = "Censored"
            self._dataRegression[4][2] = round(self._resultsCens.intercept, n_digits)
            self._dataRegression[5][2] = round(self._resultsCens.slope, n_digits)
            self._dataRegression[6][2] = round(self._resultsCens.stderr, n_digits)
            self._dataRegression[14][2] = round(self._resultsCens.testResults['R2'], n_digits)

            self._dataResiduals.insert(1, ["Fitted distribution (censored) :",
                                       self._resultsCens.resDist.__str__(), ""])
            self._dataResiduals[3][2] = "Censored"
            self._dataResiduals[5][2] = round(testResults['Kolmogorov'], n_digits)
            self._dataResiduals[8][2] = round(testResults['AndersonDarling'], n_digits)
            self._dataResiduals[9][2] = round(testResults['CramerVonMises'], n_digits)
            self._dataResiduals[12][2] = round(testResults['ZeroMean'], n_digits)
            self._dataResiduals[15][2] = round(testResults['BreuschPagan'], n_digits)
            self._dataResiduals[16][2] = round(testResults['HarrisonMcCabe'], n_digits)
            self._dataResiduals[19][2] = round(testResults['DurbinWatson'], n_digits)


################################################################################
############################### graphs #########################################
################################################################################

    def drawLinearModel(self, model="uncensored", name=None):
        """
        Draw the linear regression prediction versus the true data.

        Parameters
        ----------
        model : string
            The linear regression model to be used, either *uncensored* or
            *censored* if censored threshold were given. Default is *uncensored*.
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.
        """

        # Check is the censored model exists when asking for it 
        if model == "censored" and not self._censored:
            raise NameError('Linear model for censored data is not available.')

        defects = self._algoLinear.model.exog[:, 1]
        signals = self._algoLinear.model.endog
        if model == "uncensored":
            # get the fitted values from the linear model of statsmodels
            fittedSignals = self._algoLinear.fittedvalues
        elif model == "censored":
            fittedSignals = self._resultsCens.fittedSignals
        else:
            raise NameError("model can be 'uncensored' or 'censored'.")

        fig, ax = plt.subplots(figsize=(8, 6))
        ax.plot(defects, signals, 'b.', label='Data', ms=9)
        ax.plot(defects, fittedSignals, 'r-', label='Linear model')
        ax.set_xlabel('Defects')
        if model == "uncensored":
            ax.set_ylabel('Signals')
            ax.set_title('Linear regression model')
        elif model == "censored":
            ax.set_ylabel('Box Cox (signals)')
            ax.set_title('Linear regression model for censored data')
        ax.grid()
        ax.legend(loc='upper left')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax


    def drawResiduals(self, model="uncensored", name=None):
        """
        Draw the residuals versus the defect values.

        Parameters
        ----------
        model : string
            The residuals to be used, either *uncensored* or
            *censored* if censored threshold were given. Default is *uncensored*.
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.
        """

        # Check is the censored model exists when asking for it 
        if model == "censored" and not self._censored:
            raise NameError('Residuals for censored data is not available.')

        defects = self._algoLinear.model.exog[:, 1]
        if model == "uncensored":
            residuals = self._resultsUnc.residuals
        elif model =="censored":
            residuals = self._resultsCens.residuals
        else:
            raise NameError("model can be 'uncensored' or 'censored'.")

        fig, ax = plt.subplots(figsize=(8, 6))
        ax.grid()
        ax.plot(defects, residuals, 'b.', ms=9)
        ax.hlines(0, defects.min(), defects.max(), 'r', 'dashed')
        ax.set_xlabel('Defects')
        ax.set_ylabel('Residuals dispersion')
        if model == "uncensored":
            ax.set_title('Residuals')
        elif model == "censored":
            ax.set_title('Residuals for censored data')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax

    def drawResidualsQQplot(self, model="uncensored", name=None):
        """
        Draw the residuals QQ plot with the fitted distribution.

        Parameters
        ----------
        model : string
            The residuals to be used, either *uncensored* or
            *censored* if censored threshold were given. Default is *uncensored*.
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.
        """

        # Check is the censored model exists when asking for it 
        if model == "censored" and not self._censored:
            raise NameError('Residuals for censored data is not available.')

        if model == "uncensored":
            residuals = self._resultsUnc.residuals
            distribution = self._resultsUnc.resDist
        elif model == "censored":
            residuals = self._resultsCens.residuals
            distribution = self._resultsCens.resDist
        else:
            raise NameError("model can be 'uncensored' or 'censored'.")

        fig, ax = plt.subplots(figsize=(8, 8))
        graph = ot.VisualTest.DrawQQplot(residuals, distribution)
        drawables = graph.getDrawables()
        drawables[1].setPointStyle('dot')
        drawables[1].setLineWidth(3)
        drawables[1].setColor('blue')
        graph = ot.Graph()
        graph.add(drawables)

        graph.setXTitle('Residuals empirical quantiles')
        graph.setYTitle(distribution.__str__())
        graph.setGrid(True)
        View(graph, axes=[ax])
        if model == "uncensored":
            ax.set_title('QQ-plot of the residuals ')
        elif model == "censored":
            ax.set_title('QQ-plot of the residuals for censored data')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax


    def drawResidualsDistribution(self, model="uncensored", name=None):
        """
        Draw the residuals histogram with the fitted distribution.

        Parameters
        ----------
        model : string
            The residuals to be used, either *uncensored* or
            *censored* if censored threshold were given. Default is *uncensored*.
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.
        """

        # Check is the censored model exists when asking for it 
        if model == "censored" and not self._censored:
            raise NameError('Residuals for censored data is not available.')

        if model == "uncensored":
            residuals = self._resultsUnc.residuals
            distribution = self._resultsUnc.resDist
        elif model =="censored":
            residuals = self._resultsCens.residuals
            distribution = self._resultsCens.resDist
        else:
            raise NameError("model can be 'uncensored' or 'censored'.")

        fig, ax = plt.subplots(figsize=(8, 6))
        graphHist = ot.VisualTest.DrawHistogram(residuals)
        graphPDF = distribution.drawPDF()
        graphHist.setGrid(True)
        View(graphHist, axes=[ax], bar_kwargs={'color':'blue','alpha': 0.5, 'label':'Residuals histogram'})
        View(graphPDF, axes=[ax], plot_kwargs={'label':distribution.__str__()})
        ax.set_xlabel('Defect realizations')
        if model == "uncensored":
            ax.set_title('Residuals distribution')
        elif model == "censored":
            ax.set_title('Residuals distribution for censored data')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax


    def drawBoxCoxLikelihood(self, name=None):
        """
        Draw the loglikelihood versus the Box Cox parameter.

        Parameters
        ----------
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.

        Notes
        -----
        This method is available only when the parameter *boxCox* is set to True.
        """

        # Check is the censored model exists when asking for it 
        if not self._boxCox:
            raise Exception('The Box Cox transformation is not enabled.')

        fig, ax = plt.subplots(figsize=(8, 6))
        # get the graph from the method 'computeBoxCox'
        View(self._graphBoxCox, axes=[ax])
        ax.set_xlabel('Box Cox parameter')
        ax.set_ylabel('LogLikelihood')
        ax.set_title('Loglikelihood versus Box Cox parameter')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax


################################################################################
###################### get methods #############################################
################################################################################

    def getInputSample(self):
        """
        Accessor to the input sample. 

        Returns
        -------
        defects : :py:class:`openturns.NumericalSample`
            The input sample which is the defect values.
        """
        return self._inputSample

    def getOutputSample(self):
        """
        Accessor to the output sample. 

        Returns
        -------
        signals : :py:class:`openturns.NumericalSample`
            The input sample which is the signal values.
        """
        return self._outputSample

    def getNoiseThreshold(self):
        """
        Accessor to the noise threshold. 

        Returns
        -------
        noiseThres : float
            The noise threhold if it exists, if not it returns *None*.
        """
        return self._noiseThres

    def getSaturationThreshold(self):
        """
        Accessor to the saturation threshold. 

        Returns
        -------
        saturationThres : float
            The saturation threhold if it exists, if not it returns *None*.
        """
        return self._saturationThres

    def getResiduals(self):
        """
        Accessor to the residuals. 

        Returns
        -------
        residuals : :py:class:`openturns.NumericalSample`
            The residuals computed from the uncensored and censored linear
            regression model. The first column corresponds with the uncensored case.
        """
        size = self._resultsUnc.residuals.getSize()
        if self._censored:
            residuals = ot.NumericalSample(size, 2)
            residuals[:, 0] = self._resultsUnc.residuals
            residuals[:, 1] = self._resultsCens.residuals
            residuals.setDescription(['Residuals for uncensored case',
                                      'Residuals for censored case'])
        else:
            residuals = self._resultsUnc.residuals
            residuals.setDescription(['Residuals for uncensored case'])

        return residuals

    def getResidualsDistribution(self):
        """
        Accessor to the residuals distribution. 

        Returns
        -------
        distribution : list of :py:class:`openturns.Distribution`
            The fitted distribution on the residuals, computed in the uncensored
            and censored (if so) case.
        """
        distribution = [self._resultsUnc.resDist]
        if self._censored:
            distribution.append(self._resultsCens.resDist)
        return distribution

    def getIntercept(self):
        """
        Accessor to the intercept of the linear regression model. 

        Returns
        -------
        intercept : :py:class:`openturns.NumericalPoint`
            The intercept parameter for the uncensored and censored (if so) linear
            regression model.
        """
        if self._censored:
            intercept = ot.NumericalPointWithDescription(
                        [('Intercept for uncensored case', 
                        self._resultsUnc.intercept),
                        ('Intercept for censored case',
                        self._resultsCens.intercept)])
        else:
            intercept = ot.NumericalPointWithDescription(
                        [('Intercept for uncensored case', 
                        self._resultsUnc.intercept)])

        return intercept

    def getSlope(self):
        """
        Accessor to the slope of the linear regression model. 

        Returns
        -------
        slope : :py:class:`openturns.NumericalPoint`
            The slope parameter for the uncensored and censored (if so) linear
            regression model.
        """
        if self._censored:
            slope = ot.NumericalPointWithDescription(
                        [('Slope for uncensored case', 
                        self._resultsUnc.slope),
                        ('Slope for censored case',
                        self._resultsCens.slope)])
        else:
            slope = ot.NumericalPointWithDescription(
                        [('Slope for uncensored case', 
                        self._resultsUnc.slope)])

        return slope

    def getStandardError(self):
        """
        Accessor to the standard error of the estimate. 

        Returns
        -------
        stderr : :py:class:`openturns.NumericalPoint`
            The standard error of the estimate for the uncensored and censored
            (if so) linear regression model.
        """
        if self._censored:
            stderr = ot.NumericalPointWithDescription(
                        [('Stderr for uncensored case', 
                        self._resultsUnc.stderr),
                        ('Stderr for censored case',
                        self._resultsCens.stderr)])
        else:
            stderr = ot.NumericalPointWithDescription(
                        [('Stderr for uncensored case', 
                        self._resultsUnc.stderr)])

        return stderr

    def getBoxCoxParameter(self):
        """
        Accessor to the Box Cox parameter. 

        Returns
        -------
        lambdaBoxCox : float
            The Box Cox parameter used to transform the data. If the transformation
            is not enabled None is returned. 
        """
        return self._lambdaBoxCox

    def getR2(self):
        """
        Accessor to the R2 value. 

        Returns
        -------
        R2 : :py:class:`openturns.NumericalPoint`
            Either the R2 for the uncensored case or for both cases.
        """
        return self._getResultValue('R2', 'R2')

    def getAndersonDarlingPValue(self):
        """
        Accessor to the Anderson Darling test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('AndersonDarling', 'Anderson Darling p-value')


    def getCramerVonMisesPValue(self):
        """
        Accessor to the Cramer Von Mises test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('CramerVonMises', 'Cramer Von Mises p-value')

    def getKolmogorovPValue(self):
        """
        Accessor to the Kolmogorov test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('Kolmogorov', 'Kolmogorov p-value')

    def getZeroMeanPValue(self):
        """
        Accessor to the Zero Mean test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('ZeroMean', 'Zero Mean p-value')

    def getBreuschPaganPValue(self):
        """
        Accessor to the Breusch Pagan test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('BreuschPagan', 'Breusch Pagan p-value')

    def getHarrisonMcCabePValue(self):
        """
        Accessor to the Harrison McCabe test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('HarrisonMcCabe', 'Harrison McCabe p-value')

    def getDurbinWatsonPValue(self):
        """
        Accessor to the Durbin Watson test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('DurbinWatson', 'Durbin Watson p-value')


    def _getResultValue(self, test, description):
        """
        Generalized accessor method for the R2 or p-values.
        Parameters
        ----------
        test : string
            name of the keys for the dictionnary.
        description : string
            name the test to be displayed.
        """
        if self._censored:
            pValue = ot.NumericalPointWithDescription(
                        [(description + ' for uncensored case', 
                        self._resultsUnc.testResults[test]),
                        (description + ' for censored case',
                        self._resultsCens.testResults[test])])
        else:
            pValue = ot.NumericalPointWithDescription(
                        [(description + ' for uncensored case', 
                        self._resultsUnc.testResults[test])])
        return pValue
Пример #7
0
class UnivariateLinearModelAnalysis():
    """
    Linear regression analysis with residuals hypothesis tests.
    
    **Available constructors:**

    UnivariateLinearModelAnalysis(*inputSample, outputSample*)

    UnivariateLinearModelAnalysis(*inputSample, outputSample, noiseThres,
    saturationThres, resDistFact, boxCox*)

    Parameters
    ----------
    inputSample : 2-d sequence of float
        Vector of the defect sizes, of dimension 1.
    outputSample : 2-d sequence of float
        Vector of the signals, of dimension 1.
    noiseThres : float
        Value for low censored data. Default is None.
    saturationThres : float
        Value for high censored data. Default is None.
    resDistFact : :py:class:`openturns.DistributionFactory`
        Distribution hypothesis followed by the residuals. Default is 
        :py:class:`openturns.NormalFactory`.
    boxCox : bool or float
        Enable or not the Box Cox transformation. If boxCox is a float, the Box
        Cox transformation is enabled with the given value. Default is False.

    Notes
    -----
    This method automatically :

    - computes the Box Cox parameter if *boxCox* is True,
    - computes the transformed signals if *boxCox* is True or a float,
    - builds the univariate linear regression model on the data,
    - computes the linear regression parameters for censored data if needed,
    - computes the residuals,
    - runs all hypothesis tests.

    Examples
    --------
    Generate data :

    >>> import openturns as ot
    >>> import otpod
    >>> N = 100
    >>> ot.RandomGenerator.SetSeed(0)
    >>> defectDist = ot.Uniform(0.1, 0.6)
    >>> epsilon = ot.Normal(0, 1.9)
    >>> defects = defectDist.getSample(N)
    >>> signalsInvBoxCox = defects * 43. + epsilon.getSample(N) + 2.5
    >>> invBoxCox = ot.InverseBoxCoxTransform(0.3)
    >>> signals = invBoxCox(signalsInvBoxCox)
    
    Run analysis with gaussian hypothesis on the residuals :

    >>> analysis = otpod.UnivariateLinearModelAnalysis(defects, signals, boxCox=True)
    >>> print analysis.getIntercept() # get intercept value
    [Intercept for uncensored case : 2.51037]
    >>> print analysis.getKolmogorovPValue()
    [Kolmogorov p-value for uncensored case : 0.835529]
    
    Run analysis with noise and saturation threshold :

    >>> analysis = otpod.UnivariateLinearModelAnalysis(defects, signals, 60., 1700., boxCox=True)
    >>> print analysis.getIntercept() # get intercept value for uncensored and censored case
    [Intercept for uncensored case : 4.28758, Intercept for censored case : 3.11243]
    >>> print analysis.getKolmogorovPValue()
    [Kolmogorov p-value for uncensored case : 0.346827, Kolmogorov p-value for censored case : 0.885006]

    Run analysis with a Weibull distribution hypothesis on the residuals

    >>> analysis = otpod.UnivariateLinearModelAnalysis(defects, signals, 60., 1700., ot.WeibullFactory(), boxCox=True)
    >>> print analysis.getIntercept() # get intercept value for uncensored and censored case
    [Intercept for uncensored case : 4.28758, Intercept for censored case : 3.11243]
    >>> print analysis.getKolmogorovPValue()
    [Kolmogorov p-value for uncensored case : 0.476036, Kolmogorov p-value for censored case : 0.71764]
    """
    def __init__(self,
                 inputSample,
                 outputSample,
                 noiseThres=None,
                 saturationThres=None,
                 resDistFact=None,
                 boxCox=False):

        self._inputSample = ot.NumericalSample(np.vstack(inputSample))
        self._outputSample = ot.NumericalSample(np.vstack(outputSample))
        self._noiseThres = noiseThres
        self._saturationThres = saturationThres
        # Add flag to tell if censored data must taken into account or not.
        if noiseThres is not None or saturationThres is not None:
            # flag to tell censoring is enabled
            self._censored = True
            # Results instances are created for both cases.
            self._resultsCens = _Results()
            self._resultsUnc = _Results()
        else:
            self._censored = False
            # Results instance is created only for uncensored case.
            self._resultsUnc = _Results()

        if resDistFact is None:
            # default is NormalFactory
            self._resDistFact = ot.NormalFactory()
        else:
            self._resDistFact = resDistFact

        # if Box Cox is a float the transformation is enabled with the given value
        if type(boxCox) is float:
            self._lambdaBoxCox = boxCox
            self._boxCox = True
        else:
            self._lambdaBoxCox = None
            self._boxCox = boxCox

        self._size = self._inputSample.getSize()
        self._dim = self._inputSample.getDimension()

        # Assertions on parameters
        assert (self._size >= 3), "Not enough observations."
        assert (self._size == self._outputSample.getSize()), \
                "InputSample and outputSample must have the same size."
        assert (self._dim == 1), "Dimension of inputSample must be 1."
        assert (self._outputSample.getDimension() == 1
                ), "Dimension of outputSample must be 1."

        # run the analysis
        self._run()
        # print warnings
        self._printWarnings()

    def _run(self):
        """
        Run the analysis :
        - Computes the Box Cox parameter if *boxCox* is True,
        - Computes the transformed signals if *boxCox* is True or a float,
        - Builds the univariate linear regression model on the data,
        - Computes the linear regression parameters for censored data if needed,
        - Computes the residuals,
        - Runs all hypothesis tests.
        """

        #################### Filter censored data ##############################
        if self._censored:
            # Filter censored data
            # Returns:
            # defects in the non censored area
            # defectsNoise in the noisy area
            # defectsSat in the saturation area
            # signals in the non censored area
            # check if one the threshold is None
            defects, defectsNoise, defectsSat, signals = \
                DataHandling.filterCensoredData(self._inputSample, self._outputSample,
                              self._noiseThres, self._saturationThres)
        else:
            defects, signals = self._inputSample, self._outputSample

        defectsSize = defects.getSize()

        ###################### Box Cox transformation ##########################
        # Compute Box Cox if enabled
        if self._boxCox:
            if self._lambdaBoxCox is None:
                # optimization required, get optimal lambda and graph
                self._lambdaBoxCox, self._graphBoxCox = computeBoxCox(
                    defects, signals)

            # Transformation of data
            boxCoxTransform = ot.BoxCoxTransform([self._lambdaBoxCox])
            signals = boxCoxTransform(signals)
            if self._noiseThres is not None:
                noiseThres = boxCoxTransform([self._noiseThres])[0]
            else:
                noiseThres = self._noiseThres
            if self._saturationThres is not None:
                saturationThres = boxCoxTransform([self._saturationThres])[0]
            else:
                saturationThres = self._saturationThres
        else:
            noiseThres = self._noiseThres
            saturationThres = self._saturationThres

        ######################### Linear Regression model ######################
        # Linear regression with statsmodels module
        # Create the X matrix : [1, inputSample]
        X = ot.NumericalSample(defectsSize, [1, 0])
        X[:, 1] = defects
        self._algoLinear = OLS(np.array(signals), np.array(X)).fit()

        self._resultsUnc.intercept = self._algoLinear.params[0]
        self._resultsUnc.slope = self._algoLinear.params[1]
        # get standard error estimates (residuals standard deviation)
        self._resultsUnc.stderr = np.sqrt(self._algoLinear.scale)
        # get confidence interval at level 95%
        self._resultsUnc.confInt = self._algoLinear.conf_int(0.05)

        if self._censored:
            # define initial starting point for MLE optimization
            initialStartMLE = [
                self._resultsUnc.intercept, self._resultsUnc.slope,
                self._resultsUnc.stderr
            ]
            # MLE optimization
            res = computeLinearParametersCensored(initialStartMLE, defects,
                                                  defectsNoise, defectsSat,
                                                  signals, noiseThres,
                                                  saturationThres)
            self._resultsCens.intercept = res[0]
            self._resultsCens.slope = res[1]
            self._resultsCens.stderr = res[2]

        ############################ Residuals #################################
        # get residuals from algoLinear
        self._resultsUnc.residuals = ot.NumericalSample(
            np.vstack(self._algoLinear.resid))
        # compute residuals distribution
        self._resultsUnc.resDist = self._resDistFact.build(
            self._resultsUnc.residuals)

        if self._censored:
            # create linear model function for censored case
            def CensLinModel(x):
                return self._resultsCens.intercept + self._resultsCens.slope * x

            # compute the residuals for the censored case.
            self._resultsCens.fittedSignals = CensLinModel(defects)
            self._resultsCens.residuals = signals - self._resultsCens.fittedSignals
            # compute residuals distribution.
            self._resultsCens.resDist = self._resDistFact.build(
                self._resultsCens.residuals)

        ########################## Compute tests ###############################
        self._resultsUnc.testResults = \
                self._computeTests(defects, signals, self._resultsUnc.residuals,
                                   self._resultsUnc.resDist)

        if self._censored:
            self._resultsCens.testResults = \
                self._computeTests(defects, signals, self._resultsCens.residuals,
                                   self._resultsCens.resDist)

        ################ Build the result lists to be printed ##################
        self._buildPrintResults()

################################################################################
###################### Hypothesis and validation tests #########################
################################################################################

    def _computeTests(self, defects, signals, residuals, resDist):

        testResults = {}
        # compute R2
        testResults['R2'] = computeR2(signals, residuals)

        # compute Anderson Darling test (normality test)
        testAnderDar = ot.NormalityTest.AndersonDarlingNormal(residuals)
        testResults['AndersonDarling'] = testAnderDar.getPValue()

        # compute Cramer Von Mises test (normality test)
        testCramVM = ot.NormalityTest.CramerVonMisesNormal(residuals)
        testResults['CramerVonMises'] = testCramVM.getPValue()

        # compute zero residual mean test
        testResults['ZeroMean'] = computeZeroMeanTest(residuals)

        # compute Kolmogorov test (fitting test)
        if LooseVersion(ot.__version__) == '1.6':
            testKol = ot.FittingTest.Kolmogorov(residuals, resDist, 0.95,
                                                resDist.getParametersNumber())
        elif LooseVersion(ot.__version__) > '1.6':
            testKol = ot.FittingTest.Kolmogorov(
                residuals, resDist, 0.95, resDist.getParameterDimension())

        testResults['Kolmogorov'] = testKol.getPValue()

        # compute Breusch Pagan test (homoskedasticity : constant variance)
        testResults['BreuschPagan'] = computeBreuschPaganTest(
            defects, residuals)

        # compute Harrison McCabe test (homoskedasticity : constant variance)
        testResults['HarrisonMcCabe'] = computeHarrisonMcCabeTest(residuals)

        # compute Durbin Watson test (autocorrelation == 0)
        testResults['DurbinWatson'] = computeDurbinWatsonTest(
            defects, residuals)

        return testResults

################################################################################
########################## Print and save results ##############################
################################################################################

    def getResults(self):
        """
        Print results of the linear analysis.
        """
        # Enable warning to be displayed
        ot.Log.Show(ot.Log.WARN)

        regressionResult = '\n'.join([
            '{:<47} {:>13} {:>13}'.format(*line)
            for line in self._dataRegression
        ])

        residualsResult = '\n'.join([
            '{:<47} {:>13} {:>13}'.format(*line)
            for line in self._dataResiduals
        ])

        ndash = 80
        results = '-' * ndash + '\n'
        results = results + '         Linear model analysis results' + '\n'
        results = results + '-' * ndash + '\n'
        results = results + regressionResult + '\n'
        results = results + '-' * ndash + '\n'
        results = results + '' + '\n'
        results = results + '-' * ndash + '\n'
        results = results + '         Residuals analysis results' + '\n'
        results = results + '-' * ndash + '\n'
        results = results + residualsResult + '\n'
        results = results + '-' * ndash + '\n'
        results = results + '' + '\n'
        # print warnings if not empty
        if self._printWarnings(False).count('') != len(
                self._printWarnings(False)):
            results = results + 'Warning : ' + '\nWarning : '.join([
                '{}'.format(line)
                for line in self._printWarnings(False) if len(line) > 0
            ])
        return results

    def _printWarnings(self, disp=True):
        # Check results and display warnings

        valuesUnc = np.array(list(self._resultsUnc.testResults.values()))
        if self._censored:
            valuesCens = np.array(list(self._resultsCens.testResults.values()))
            testPValues = ((valuesUnc < 0.05).any()
                           or (valuesCens < 0.05).any())
        else:
            testPValues = (valuesUnc < 0.05).any()

        # print warning if some pValues are less than 0.05
        msg = ["", "", ""]
        if testPValues and not self._boxCox:
            msg[0] = 'Some hypothesis tests failed : you may consider to use '+\
                        'the Box Cox transformation.'
            if disp:
                logging.warn(msg[0])
                # ot.Log.Warn(msg[0])
                # ot.Log.Flush()
        elif testPValues and self._boxCox:
            msg[1] = 'Some hypothesis tests failed : you may consider to use '+\
                'quantile regression or kriging (if input dimension > 1) to build POD.'
            if disp:
                logging.warn(msg[1])
                # ot.Log.Warn(msg[1])
                # ot.Log.Flush()

        if self._resultsUnc.resDist.getClassName() != 'Normal':
            msg[2] = 'Confidence interval, Normality tests and zero ' + \
                        'residual mean test are given assuming the residuals ' +\
                        'follow a Normal distribution.'
            if disp:
                logging.warn(msg[2])
                # ot.Log.Warn(msg[2])
                # ot.Log.Flush()
        # return msg for the test with pytest and the method getResult()
        return msg

    def saveResults(self, name):
        """
        Save all analysis test results in a file.

        Parameters
        ----------
        name : string
            Name of the file or full path name.

        Notes
        -----
        The file can be saved as a csv file. Separations are made with tabulations.

        If *name* is the file name, then it is saved in the current working
        directory.
        """
        regressionResult = '\n'.join(
            ['{}\t{}\t{}'.format(*line) for line in self._dataRegression])

        residualsResult = '\n'.join(
            ['{}\t{}\t{}'.format(*line) for line in self._dataResiduals])

        with open(name, 'w') as fd:
            fd.write('Linear model analysis results\n\n')
            fd.write(regressionResult)
            fd.write('\n\nResiduals analysis results\n\n')
            fd.write(residualsResult)
            # add warnings if not empty
            if self._printWarnings(False).count('') != len(
                    self._printWarnings(False)):
                fd.write('\n\n')
                fd.write('Warning : ' + '\nWarning : '.join([
                    '{}'.format(line)
                    for line in self._printWarnings(False) if len(line) > 0
                ]))

    def _buildPrintResults(self):
        # Build the lists used in the printResult and saveResults methods :
        # self._dataRegression
        # self._dataResiduals

        # number of digits to be displayed
        n_digits = 2
        #format for confidence interval
        strformat = "[{:0." + str(n_digits) + "f}, {:0." + str(
            n_digits) + "f}]"

        if self._boxCox:
            boxCoxstr = round(self._lambdaBoxCox, n_digits)
        else:
            boxCoxstr = "Not enabled"

        testResults = self._resultsUnc.testResults

        # create lists containing all results
        self._dataRegression = [
            ["Box Cox parameter :", boxCoxstr, ""], ["", "", ""],
            ["", "Uncensored", ""], ["", "", ""],
            [
                "Intercept coefficient :",
                round(self._resultsUnc.intercept, n_digits), ""
            ],
            [
                "Slope coefficient :",
                round(self._resultsUnc.slope, n_digits), ""
            ],
            [
                "Standard error of the estimate :",
                round(self._resultsUnc.stderr, n_digits), ""
            ], ["", "", ""], ["Confidence interval on coefficients", "", ""],
            [
                "Intercept coefficient :",
                strformat.format(*self._resultsUnc.confInt[0]), ""
            ],
            [
                "Slope coefficient :",
                strformat.format(*self._resultsUnc.confInt[1]), ""
            ], ["Level :", 0.95, ""], ["", "", ""],
            ["Quality of regression", "", ""],
            [
                "R2 (> 0.8):",
                round(self._resultsUnc.testResults['R2'], n_digits), ""
            ]
        ]

        self._dataResiduals = [
            [
                "Fitted distribution (uncensored) :",
                self._resultsUnc.resDist.__str__(), ""
            ], ["", "", ""], ["", "Uncensored", ""],
            ["Distribution fitting test", "", ""],
            [
                "Kolmogorov p-value (> 0.05):",
                round(testResults['Kolmogorov'], n_digits), ""
            ], ["", "", ""], ["Normality test", "", ""],
            [
                "Anderson Darling p-value (> 0.05):",
                round(testResults['AndersonDarling'], n_digits), ""
            ],
            [
                "Cramer Von Mises p-value (> 0.05):",
                round(testResults['CramerVonMises'], n_digits), ""
            ], ["", "", ""], ["Zero residual mean test", "", ""],
            [
                "p-value (> 0.05):",
                round(testResults['ZeroMean'], n_digits), ""
            ], ["", "", ""],
            ["Homoskedasticity test (constant variance)", "", ""],
            [
                "Breush Pagan p-value (> 0.05):",
                round(testResults['BreuschPagan'], n_digits), ""
            ],
            [
                "Harrison McCabe p-value (> 0.05):",
                round(testResults['HarrisonMcCabe'], n_digits), ""
            ], ["", "", ""], ["Non autocorrelation test", "", ""],
            [
                "Durbin Watson p-value (> 0.05):",
                round(testResults['DurbinWatson'], n_digits), ""
            ]
        ]

        if self._censored:
            # Add censored case results in the lists
            testResults = self._resultsCens.testResults

            self._dataRegression[2][2] = "Censored"
            self._dataRegression[4][2] = round(self._resultsCens.intercept,
                                               n_digits)
            self._dataRegression[5][2] = round(self._resultsCens.slope,
                                               n_digits)
            self._dataRegression[6][2] = round(self._resultsCens.stderr,
                                               n_digits)
            self._dataRegression[14][2] = round(
                self._resultsCens.testResults['R2'], n_digits)

            self._dataResiduals.insert(1, [
                "Fitted distribution (censored) :",
                self._resultsCens.resDist.__str__(), ""
            ])
            self._dataResiduals[3][2] = "Censored"
            self._dataResiduals[5][2] = round(testResults['Kolmogorov'],
                                              n_digits)
            self._dataResiduals[8][2] = round(testResults['AndersonDarling'],
                                              n_digits)
            self._dataResiduals[9][2] = round(testResults['CramerVonMises'],
                                              n_digits)
            self._dataResiduals[12][2] = round(testResults['ZeroMean'],
                                               n_digits)
            self._dataResiduals[15][2] = round(testResults['BreuschPagan'],
                                               n_digits)
            self._dataResiduals[16][2] = round(testResults['HarrisonMcCabe'],
                                               n_digits)
            self._dataResiduals[19][2] = round(testResults['DurbinWatson'],
                                               n_digits)

################################################################################
############################### graphs #########################################
################################################################################

    def drawLinearModel(self, model="uncensored", name=None):
        """
        Draw the linear regression prediction versus the true data.

        Parameters
        ----------
        model : string
            The linear regression model to be used, either *uncensored* or
            *censored* if censored threshold were given. Default is *uncensored*.
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.
        """

        # Check is the censored model exists when asking for it
        if model == "censored" and not self._censored:
            raise NameError('Linear model for censored data is not available.')

        defects = self._algoLinear.model.exog[:, 1]
        signals = self._algoLinear.model.endog
        if model == "uncensored":
            # get the fitted values from the linear model of statsmodels
            fittedSignals = self._algoLinear.fittedvalues
        elif model == "censored":
            fittedSignals = self._resultsCens.fittedSignals
        else:
            raise NameError("model can be 'uncensored' or 'censored'.")

        fig, ax = plt.subplots(figsize=(8, 6))
        ax.plot(defects, signals, 'b.', label='Data', ms=9)
        ax.plot(defects, fittedSignals, 'r-', label='Linear model')
        ax.set_xlabel('Defects')
        if model == "uncensored":
            ax.set_ylabel('Signals')
            ax.set_title('Linear regression model')
        elif model == "censored":
            ax.set_ylabel('Box Cox (signals)')
            ax.set_title('Linear regression model for censored data')
        ax.grid()
        ax.legend(loc='upper left')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax

    def drawResiduals(self, model="uncensored", name=None):
        """
        Draw the residuals versus the defect values.

        Parameters
        ----------
        model : string
            The residuals to be used, either *uncensored* or
            *censored* if censored threshold were given. Default is *uncensored*.
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.
        """

        # Check is the censored model exists when asking for it
        if model == "censored" and not self._censored:
            raise NameError('Residuals for censored data is not available.')

        defects = self._algoLinear.model.exog[:, 1]
        if model == "uncensored":
            residuals = self._resultsUnc.residuals
        elif model == "censored":
            residuals = self._resultsCens.residuals
        else:
            raise NameError("model can be 'uncensored' or 'censored'.")

        fig, ax = plt.subplots(figsize=(8, 6))
        ax.grid()
        ax.plot(defects, residuals, 'b.', ms=9)
        ax.hlines(0, defects.min(), defects.max(), 'r', 'dashed')
        ax.set_xlabel('Defects')
        ax.set_ylabel('Residuals dispersion')
        if model == "uncensored":
            ax.set_title('Residuals')
        elif model == "censored":
            ax.set_title('Residuals for censored data')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax

    def drawResidualsQQplot(self, model="uncensored", name=None):
        """
        Draw the residuals QQ plot with the fitted distribution.

        Parameters
        ----------
        model : string
            The residuals to be used, either *uncensored* or
            *censored* if censored threshold were given. Default is *uncensored*.
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.
        """

        # Check is the censored model exists when asking for it
        if model == "censored" and not self._censored:
            raise NameError('Residuals for censored data is not available.')

        if model == "uncensored":
            residuals = self._resultsUnc.residuals
            distribution = self._resultsUnc.resDist
        elif model == "censored":
            residuals = self._resultsCens.residuals
            distribution = self._resultsCens.resDist
        else:
            raise NameError("model can be 'uncensored' or 'censored'.")

        fig, ax = plt.subplots(figsize=(8, 8))
        graph = ot.VisualTest.DrawQQplot(residuals, distribution)
        drawables = graph.getDrawables()
        drawables[1].setPointStyle('dot')
        drawables[1].setLineWidth(3)
        drawables[1].setColor('blue')
        graph = ot.Graph()
        graph.add(drawables)

        graph.setXTitle('Residuals empirical quantiles')
        graph.setYTitle(distribution.__str__())
        graph.setGrid(True)
        View(graph, axes=[ax])
        if model == "uncensored":
            ax.set_title('QQ-plot of the residuals ')
        elif model == "censored":
            ax.set_title('QQ-plot of the residuals for censored data')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax

    def drawResidualsDistribution(self, model="uncensored", name=None):
        """
        Draw the residuals histogram with the fitted distribution.

        Parameters
        ----------
        model : string
            The residuals to be used, either *uncensored* or
            *censored* if censored threshold were given. Default is *uncensored*.
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.
        """

        # Check is the censored model exists when asking for it
        if model == "censored" and not self._censored:
            raise NameError('Residuals for censored data is not available.')

        if model == "uncensored":
            residuals = self._resultsUnc.residuals
            distribution = self._resultsUnc.resDist
        elif model == "censored":
            residuals = self._resultsCens.residuals
            distribution = self._resultsCens.resDist
        else:
            raise NameError("model can be 'uncensored' or 'censored'.")

        fig, ax = plt.subplots(figsize=(8, 6))
        graphHist = ot.VisualTest.DrawHistogram(residuals)
        graphPDF = distribution.drawPDF()
        graphHist.setGrid(True)
        View(graphHist,
             axes=[ax],
             bar_kwargs={
                 'color': 'blue',
                 'alpha': 0.5,
                 'label': 'Residuals histogram'
             })
        View(graphPDF,
             axes=[ax],
             plot_kwargs={'label': distribution.__str__()})
        ax.set_xlabel('Defect realizations')
        if model == "uncensored":
            ax.set_title('Residuals distribution')
        elif model == "censored":
            ax.set_title('Residuals distribution for censored data')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax

    def drawBoxCoxLikelihood(self, name=None):
        """
        Draw the loglikelihood versus the Box Cox parameter.

        Parameters
        ----------
        name : string
            name of the figure to be saved with *transparent* option sets to True
            and *bbox_inches='tight'*. It can be only the file name or the 
            full path name. Default is None.

        Returns
        -------
        fig : `matplotlib.figure <http://matplotlib.org/api/figure_api.html>`_
            Matplotlib figure object.
        ax : `matplotlib.axes <http://matplotlib.org/api/axes_api.html>`_
            Matplotlib axes object.

        Notes
        -----
        This method is available only when the parameter *boxCox* is set to True.
        """

        # Check is the censored model exists when asking for it
        if not self._boxCox:
            raise Exception('The Box Cox transformation is not enabled.')

        fig, ax = plt.subplots(figsize=(8, 6))
        # get the graph from the method 'computeBoxCox'
        View(self._graphBoxCox, axes=[ax])
        ax.set_xlabel('Box Cox parameter')
        ax.set_ylabel('LogLikelihood')
        ax.set_title('Loglikelihood versus Box Cox parameter')

        if name is not None:
            fig.savefig(name, bbox_inches='tight', transparent=True)

        return fig, ax

################################################################################
###################### get methods #############################################
################################################################################

    def getInputSample(self):
        """
        Accessor to the input sample. 

        Returns
        -------
        defects : :py:class:`openturns.NumericalSample`
            The input sample which is the defect values.
        """
        return self._inputSample

    def getOutputSample(self):
        """
        Accessor to the output sample. 

        Returns
        -------
        signals : :py:class:`openturns.NumericalSample`
            The input sample which is the signal values.
        """
        return self._outputSample

    def getNoiseThreshold(self):
        """
        Accessor to the noise threshold. 

        Returns
        -------
        noiseThres : float
            The noise threhold if it exists, if not it returns *None*.
        """
        return self._noiseThres

    def getSaturationThreshold(self):
        """
        Accessor to the saturation threshold. 

        Returns
        -------
        saturationThres : float
            The saturation threhold if it exists, if not it returns *None*.
        """
        return self._saturationThres

    def getResiduals(self):
        """
        Accessor to the residuals. 

        Returns
        -------
        residuals : :py:class:`openturns.NumericalSample`
            The residuals computed from the uncensored and censored linear
            regression model. The first column corresponds with the uncensored case.
        """
        size = self._resultsUnc.residuals.getSize()
        if self._censored:
            residuals = ot.NumericalSample(size, 2)
            residuals[:, 0] = self._resultsUnc.residuals
            residuals[:, 1] = self._resultsCens.residuals
            residuals.setDescription([
                'Residuals for uncensored case', 'Residuals for censored case'
            ])
        else:
            residuals = self._resultsUnc.residuals
            residuals.setDescription(['Residuals for uncensored case'])

        return residuals

    def getResidualsDistribution(self):
        """
        Accessor to the residuals distribution. 

        Returns
        -------
        distribution : list of :py:class:`openturns.Distribution`
            The fitted distribution on the residuals, computed in the uncensored
            and censored (if so) case.
        """
        distribution = [self._resultsUnc.resDist]
        if self._censored:
            distribution.append(self._resultsCens.resDist)
        return distribution

    def getIntercept(self):
        """
        Accessor to the intercept of the linear regression model. 

        Returns
        -------
        intercept : :py:class:`openturns.NumericalPoint`
            The intercept parameter for the uncensored and censored (if so) linear
            regression model.
        """
        if self._censored:
            intercept = ot.NumericalPointWithDescription([
                ('Intercept for uncensored case', self._resultsUnc.intercept),
                ('Intercept for censored case', self._resultsCens.intercept)
            ])
        else:
            intercept = ot.NumericalPointWithDescription([
                ('Intercept for uncensored case', self._resultsUnc.intercept)
            ])

        return intercept

    def getSlope(self):
        """
        Accessor to the slope of the linear regression model. 

        Returns
        -------
        slope : :py:class:`openturns.NumericalPoint`
            The slope parameter for the uncensored and censored (if so) linear
            regression model.
        """
        if self._censored:
            slope = ot.NumericalPointWithDescription([
                ('Slope for uncensored case', self._resultsUnc.slope),
                ('Slope for censored case', self._resultsCens.slope)
            ])
        else:
            slope = ot.NumericalPointWithDescription([
                ('Slope for uncensored case', self._resultsUnc.slope)
            ])

        return slope

    def getStandardError(self):
        """
        Accessor to the standard error of the estimate. 

        Returns
        -------
        stderr : :py:class:`openturns.NumericalPoint`
            The standard error of the estimate for the uncensored and censored
            (if so) linear regression model.
        """
        if self._censored:
            stderr = ot.NumericalPointWithDescription([
                ('Stderr for uncensored case', self._resultsUnc.stderr),
                ('Stderr for censored case', self._resultsCens.stderr)
            ])
        else:
            stderr = ot.NumericalPointWithDescription([
                ('Stderr for uncensored case', self._resultsUnc.stderr)
            ])

        return stderr

    def getBoxCoxParameter(self):
        """
        Accessor to the Box Cox parameter. 

        Returns
        -------
        lambdaBoxCox : float
            The Box Cox parameter used to transform the data. If the transformation
            is not enabled None is returned. 
        """
        return self._lambdaBoxCox

    def getR2(self):
        """
        Accessor to the R2 value. 

        Returns
        -------
        R2 : :py:class:`openturns.NumericalPoint`
            Either the R2 for the uncensored case or for both cases.
        """
        return self._getResultValue('R2', 'R2')

    def getAndersonDarlingPValue(self):
        """
        Accessor to the Anderson Darling test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('AndersonDarling',
                                    'Anderson Darling p-value')

    def getCramerVonMisesPValue(self):
        """
        Accessor to the Cramer Von Mises test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('CramerVonMises',
                                    'Cramer Von Mises p-value')

    def getKolmogorovPValue(self):
        """
        Accessor to the Kolmogorov test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('Kolmogorov', 'Kolmogorov p-value')

    def getZeroMeanPValue(self):
        """
        Accessor to the Zero Mean test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('ZeroMean', 'Zero Mean p-value')

    def getBreuschPaganPValue(self):
        """
        Accessor to the Breusch Pagan test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('BreuschPagan', 'Breusch Pagan p-value')

    def getHarrisonMcCabePValue(self):
        """
        Accessor to the Harrison McCabe test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('HarrisonMcCabe',
                                    'Harrison McCabe p-value')

    def getDurbinWatsonPValue(self):
        """
        Accessor to the Durbin Watson test p-value.

        Returns
        -------
        pValue : :py:class:`openturns.NumericalPoint`
            Either the p-value for the uncensored case or for both cases.
        """
        return self._getResultValue('DurbinWatson', 'Durbin Watson p-value')

    def _getResultValue(self, test, description):
        """
        Generalized accessor method for the R2 or p-values.
        Parameters
        ----------
        test : string
            name of the keys for the dictionnary.
        description : string
            name the test to be displayed.
        """
        if self._censored:
            pValue = ot.NumericalPointWithDescription([
                (description + ' for uncensored case',
                 self._resultsUnc.testResults[test]),
                (description + ' for censored case',
                 self._resultsCens.testResults[test])
            ])
        else:
            pValue = ot.NumericalPointWithDescription([
                (description + ' for uncensored case',
                 self._resultsUnc.testResults[test])
            ])
        return pValue