示例#1
0
    def __init__(self, path, **kwargs):
        df = pd.read_excel(io=path, **kwargs)
        cols = df.columns
        failures = df[cols[0]].to_numpy()
        self.failures = removeNaNs(failures)
        if len(cols) > 1:
            right_censored = df[cols[1]].to_numpy()
            self.right_censored = removeNaNs(right_censored)
            f, rc = list(self.failures), list(self.right_censored)
            len_f, len_rc = len(f), len(rc)
            max_len = max(len_f, len_rc)
            if not max_len == len_f:
                f.extend([""] * (max_len - len_f))
            if not max_len == len_rc:
                rc.extend([""] * (max_len - len_rc))
            Data = {"failures": f, "right censored": rc}
            self.__df = pd.DataFrame(Data, columns=["failures", "right censored"])
        else:
            self.right_censored = None
            Data = {"failures": self.failures}
            self.__df = pd.DataFrame(Data, columns=["failures"])

        if len(cols) > 2:
            colorprint(
                "WARNING: xlsx_to_FR assumes the first two columns in the excel file are 'failures' and 'right censored'. All other columns have been ignored",
                text_color="red",
            )
示例#2
0
    def __init__(self, path, censor_code_in_xlsx=None, failure_code_in_xlsx=None, censor_code_in_XCN='C', failure_code_in_XCN='F', **kwargs):
        df = pd.read_excel(io=path, **kwargs)
        cols = df.columns
        X = df[cols[0]].to_numpy()
        X = np.array(removeNaNs(list(X)))
        C0 = df[cols[1]].to_numpy()
        C0 = removeNaNs(C0)
        C_upper = []
        for item in C0:
            if type(item) in [str, np.str_]:
                C_upper.append(item.upper())  # for strings
            else:
                C_upper.append(item)  # for numbers
        C_unique = np.unique(C_upper)
        if len(C_unique) > 2:
            error_str = str('xlsx_to_XCN assumes the second column is C (censoring code). A maximum of 2 unique censoring codes are allowed. Within this column there were ' + str(len(C_unique)) + ' unique values: ' + str(C_unique))
            raise ValueError(error_str)
        C_out = []
        if type(failure_code_in_xlsx) in [str, np.str_]:  # need to upper() the input since we are comparing with C_upper
            failure_code_in_xlsx = failure_code_in_xlsx.upper()
        if type(censor_code_in_xlsx) in [str, np.str_]:
            censor_code_in_xlsx = censor_code_in_xlsx.upper()

        for item in C_upper:
            if item == failure_code_in_xlsx:
                C_out.append(failure_code_in_XCN)
            elif item == censor_code_in_xlsx:
                C_out.append(censor_code_in_XCN)
            elif item in ['F', 'FAIL', 'FAILED', 0]:
                C_out.append(failure_code_in_XCN)
            elif item in ['R', 'RC', 'RIGHT CENS', 'RIGHT CENSORED', 'C', 'CENSORED', 'CENS', 'S', 'SUSP', 'SUSPENSION', 'SUSPENDED', 'UF', 'UNFAILED', 'UNFAIL', 'NF', 'NO FAIL', 'NO FAILURE', 'NOT FAILED', 1]:
                C_out.append(censor_code_in_XCN)
            else:
                raise ValueError('Unrecognised value in the second column of the xlsx file. xlsx_to_XCN assumes the second column is C (censoring code). Common values are used as defaults but the xlsx file contained unrecognised values. You can fix this by specifying the arguments censor_code_in_xlsx  and failure_code_in_xlsx.')
        C = np.array(C_out)

        if len(cols) > 2:
            N = df[cols[2]].to_numpy()
            N = removeNaNs(N)
        else:
            N = np.ones_like(X)  # if N is missing then it is assumed as all ones
        if len(cols) > 3:
            colorprint("WARNING: xlsx_to_XCN assumes the first three columns in the excel file are being used for 'X' (event times), 'C' (censoring codes), 'N' (number of items at each event time). All other columns have been ignored", text_color='red')
        if len(X) != len(C) or len(X) != len(N):
            raise ValueError('The lengths of the first 3 columns in the xlsx file do not match. This may be because some data is missing.')

        FR = XCN_to_FR(X=X, C=C, N=N)  # we do this seeming redundant conversion to combine any duplicates from FNRN which were not correctly summarized in the input data
        XCN = FR_to_XCN(failures=FR.failures, right_censored=FR.right_censored)
        self.X = XCN.X
        self.C = XCN.C
        self.N = XCN.N
        Data = {'event time': self.X, 'censor code': self.C, 'number of events': self.N}
        self.__df = pd.DataFrame(data=Data, columns=['event time', 'censor code', 'number of events'])
示例#3
0
    def __init__(self, path, **kwargs):
        df = pd.read_excel(io=path, **kwargs)
        cols = df.columns
        failures = df[cols[0]].to_numpy()
        num_failures = df[cols[1]].to_numpy()
        failures = removeNaNs(failures)
        num_failures = removeNaNs(num_failures)
        if len(failures) != len(num_failures):
            raise ValueError("xlsx_to_FNRN assumes the first and second columns in the excel file are 'failures' and 'number of failures'. These must be the same length.")
        if len(cols) == 2:
            right_censored = None
            num_right_censored = None
        else:
            right_censored = df[cols[2]].to_numpy()
            num_right_censored = df[cols[3]].to_numpy()
            right_censored = removeNaNs(right_censored)
            num_right_censored = removeNaNs(num_right_censored)
            if len(right_censored) != len(num_right_censored):
                raise ValueError("xlsx_to_FNRN assumes the third and fourth columns in the excel file are 'right censored' and 'number of right censored'. These must be the same length.")
        if len(cols) > 4:
            colorprint("WARNING: xlsx_to_FNRN assumes the first four columns in the excel file are 'failures', 'number of failures', 'right censored', 'number of right censored'. All other columns have been ignored", text_color='red')

        FR = FNRN_to_FR(failures=failures, num_failures=num_failures, right_censored=right_censored, num_right_censored=num_right_censored)
        FNRN = FR_to_FNRN(failures=FR.failures, right_censored=FR.right_censored)  # we do this seeming redundant conversion to combine any duplicates from FNRN which were not correctly summarized in the input data
        self.failures = FNRN.failures
        self.num_failures = FNRN.num_failures
        self.right_censored = FNRN.right_censored
        self.num_right_censored = FNRN.num_right_censored

        # make the dataframe for printing and writing to excel
        if self.right_censored is not None:
            f, nf, rc, nrc = list(self.failures), list(self.num_failures), list(self.right_censored), list(self.num_right_censored)
            len_f, len_rc = len(f), len(rc)
            max_len = max(len_f, len_rc)
            if not max_len == len_f:
                f.extend([''] * (max_len - len_f))
                nf.extend([''] * (max_len - len_f))
            if not max_len == len_rc:
                rc.extend([''] * (max_len - len_rc))
                nrc.extend([''] * (max_len - len_rc))
            Data = {'failures': f, 'number of failures': nf, 'right censored': rc, 'number of right censored': nrc}
            self.__df = pd.DataFrame(Data, columns=['failures', 'number of failures', 'right censored', 'number of right censored'])
        else:
            Data = {'failures': self.failures, 'number of failures': self.num_failures}
            self.__df = pd.DataFrame(Data, columns=['failures', 'number of failures'])
def Probability_of_failure(stress,
                           strength,
                           show_distribution_plot=True,
                           print_results=True,
                           warn=True):
    """
    This function is deprecated.
    Please use reliability.Other_functions.stress_strength
    It is the same function just in a different location with a different name.
    """
    warning_str = "DeprecationWarning: reliability.Stress_strength.Probability_of_failure was moved and renamed to reliability.Other_functions.stress_strength in version 0.5.5. Your function has still been run, however, this module will be fully deprecated in March 2021."
    colorprint(warning_str, text_color="red")
    stress_strength(
        stress=stress,
        strength=strength,
        show_distribution_plot=show_distribution_plot,
        print_results=print_results,
        warn=warn,
    )
    def __init__(self,
                 distribution,
                 data,
                 significance=0.05,
                 bins=None,
                 print_results=True,
                 show_plot=True):

        # ensure the input is a distribution object
        if type(distribution) not in [
                Weibull_Distribution, Normal_Distribution,
                Lognormal_Distribution, Exponential_Distribution,
                Gamma_Distribution, Beta_Distribution,
                Loglogistic_Distribution, Gumbel_Distribution
        ]:
            raise ValueError(
                'distribution must be a probability distribution object from the reliability.Distributions module. First define the distribution using Reliability.Distributions.___'
            )

        # ensure data is a list or array
        if type(data) not in [list, np.ndarray]:
            raise ValueError('data must be a list or array')
        if min(data) < 0 and type(distribution) not in [
                Normal_Distribution, Gumbel_Distribution
        ]:
            raise ValueError(
                'data contains values below 0 which is not appropriate when the distribution is not a Normal or Gumbel Distribution'
            )

        if significance <= 0 or significance > 0.5:
            raise ValueError(
                'significance should be between 0 and 0.5. Default is 0.05 which gives 95% confidence'
            )

        if bins is None:
            bins = 'auto'
        if type(bins) not in [str, list, np.ndarray]:
            raise ValueError(
                'bins must be a list or array of the bin edges OR a string for the bin edge method from numpy. String options are auto, fd, doane, scott, stone, rice, sturges, or sqrt. For more information see the numpy documentation on numpy.histogram_bin_edges'
            )

        observed, bin_edges = np.histogram(
            data, bins=bins, normed=False
        )  # get a histogram of the data to find the observed values

        if sum(observed) != len(data):
            colorprint('WARNING: the bins do not encompass all of the data',
                       text_color='red')
            colorprint(str('data range: ' + str(min(data)) + ' to ' +
                           str(max(data))),
                       text_color='red')
            colorprint(str('bins range: ' + str(min(bin_edges)) + ' to ' +
                           str(max(bin_edges))),
                       text_color='red')
            observed, bin_edges = np.histogram(data, bins='auto', normed=False)
            colorprint('bins has been reset to "auto".', text_color='red')
            colorprint(str('The new bins are: ' + str(bin_edges) + '\n'),
                       text_color='red')

        if min(bin_edges < 0) and type(distribution) not in [
                Normal_Distribution, Gumbel_Distribution
        ]:
            observed, bin_edges = np.histogram(
                data, bins='auto', normed=False
            )  # error will result if bins contains values below 0 for anything but the Normal or Gumbel Distributions
            colorprint(
                'WARNING: The specified bins contained values below 0. This is not appropriate when the distribution is not a Normal or Gumbel Distribution. bins has been reset to "auto".'
            )
            colorprint(str('The new bins are: ' + bin_edges), text_color='red')

        cdf = distribution.CDF(xvals=bin_edges, show_plot=False)
        cdf_diff = np.diff(cdf) / sum(
            np.diff(cdf))  # this ensures the sum is 1
        expected = len(data) * cdf_diff

        n = len(observed)
        parameters = distribution.parameters
        if parameters[
                -1] == 0:  # if the gamma parameter is 0 then adjust the number of parameters to ignore gamma
            k = len(parameters) - 1
        else:
            k = len(parameters)
        if n - k - 1 <= 0:
            raise ValueError(
                str('The length of bins is insufficient. Using a ' +
                    str(distribution.name2) +
                    ' distribution, the minimum acceptable length of bins is '
                    + str(k + 2)))

        self.bin_edges = bin_edges
        self.chisquared_statistic, _ = ss.chisquare(f_obs=observed,
                                                    f_exp=expected,
                                                    ddof=k)
        self.chisquared_critical_value = ss.chi2.ppf(1 - significance,
                                                     df=n - k - 1)
        if self.chisquared_statistic < self.chisquared_critical_value:
            self.hypothesis = 'ACCEPT'
        else:
            self.hypothesis = 'REJECT'

        if print_results is True:
            print('Chi-squared statistic:', self.chisquared_statistic)
            print('Chi-squared critical value:',
                  self.chisquared_critical_value)
            print('At the', significance, 'significance level, we can',
                  self.hypothesis, 'the hypothesis that the data comes from a',
                  distribution.param_title_long)

        if show_plot is True:
            plt.figure('Chi-squared test')
            bin_edges_to_plot = np.nan_to_num(x=bin_edges,
                                              posinf=max(data) * 1000,
                                              neginf=min(data))
            plt.hist(x=data,
                     bins=bin_edges_to_plot,
                     density=True,
                     cumulative=True,
                     color='lightgrey',
                     edgecolor='k',
                     linewidth=0.5,
                     label='Cumulative Histogram')
            distribution.CDF(label=distribution.param_title_long)
            plt.title(
                'Chi-squared test\nHypothesised distribution CDF vs cumulative histogram of data'
            )
            xmax = max(distribution.quantile(0.9999), max(data))
            xmin = min(distribution.quantile(0.0001), min(data))
            if xmin > 0 and xmin / (
                    xmax -
                    xmin) < 0.05:  # if xmin is near zero then set it to zero
                xmin = 0
            plt.xlim(xmin, xmax)
            plt.ylim(0, 1.1)
            plt.legend()
            plt.subplots_adjust(top=0.9)
            plt.show()
    def __init__(self,
                 MTBF=None,
                 number_of_failures=None,
                 CI=None,
                 test_duration=None,
                 one_sided=True,
                 time_terminated=True,
                 print_results=True):

        print_CI_warn = False  # used later if the CI is calculated
        if CI is not None:
            if CI < 0.5 or CI >= 1:
                raise ValueError(
                    'CI must be between 0.5 and 1. For example, specify CI=0.95 for 95% confidence interval'
                )
            if one_sided is True:
                CI_adj = CI
            else:
                CI_adj = 1 - ((1 - CI) / 2)

        if time_terminated is True:
            p = 2
        elif time_terminated is False:
            p = 0
        else:
            raise ValueError(
                'time_terminated must be True or False. Default is True for the time terminated test (a test stopped after a set time rather than after a set number of failures).'
            )

        if one_sided is True:
            sides = 1
        elif one_sided is False:
            sides = 2
        else:
            raise ValueError(
                'one_sided must be True or False. Default is True for the one sided confidence interval.'
            )

        if print_results not in [True, False]:
            raise ValueError(
                'print_results must be True or False. Default is True.')

        if number_of_failures is not None:
            if number_of_failures % 1 != 0 or number_of_failures < 0:
                raise ValueError(
                    'number_of_failures must be a positive integer')

        if MTBF is None and number_of_failures is not None and CI is not None and test_duration is not None:
            soln_type = 'MTBF'
            MTBF = (2 * test_duration) / ss.chi2.ppf(
                CI_adj, 2 * number_of_failures + p)

        elif MTBF is not None and number_of_failures is None and CI is not None and test_duration is not None:
            soln_type = 'failures'
            number_of_failures = 0
            while True:  # this requires an iterative search. Begins at 0 and increments by 1 until the solution is found
                result = (2 * test_duration) / ss.chi2.ppf(
                    CI_adj, 2 * number_of_failures + p) - MTBF
                if result < 0:  # solution is found when result returns a negative number (indicating too many failures)
                    break
                number_of_failures += 1

            MTBF_check = (2 * test_duration) / ss.chi2.ppf(
                CI_adj, 2 * 0 + p
            )  # checks that the maximum possible MTBF (when there are 0 failures) is within the test_duration
            if MTBF_check < MTBF:
                raise ValueError(
                    'The specified MTBF is not possible given the specified test_duration. You must increase your test_duration or decrease your MTBF.'
                )

        elif MTBF is not None and number_of_failures is not None and CI is None and test_duration is not None:
            soln_type = 'CI'
            CI_calc = ss.chi2.cdf(test_duration / (MTBF * 0.5),
                                  2 * number_of_failures + p)
            if one_sided is True:
                CI = CI_calc
            else:
                CI = 1 - (
                    2 * (1 - CI_calc)
                )  # this can give negative numbers, but only when the inputs result in an impossible CI.
            if CI < 0.5:
                print_CI_warn = True

        elif MTBF is not None and number_of_failures is not None and CI is not None and test_duration is None:
            soln_type = 'test_duration'
            test_duration = ss.chi2.ppf(CI_adj,
                                        2 * number_of_failures + p) * MTBF / 2

        elif MTBF is not None and number_of_failures is not None and CI is not None and test_duration is not None:
            raise ValueError(
                'All inputs were specified. Nothing to calculate.')

        else:
            raise ValueError(
                'More than one input was not specified. You must specify any 3 out of the 4 inputs (not including one_sided or print_results) and the remaining input will be calculated.'
            )

        self.test_duration = test_duration
        self.MTBF = MTBF
        self.number_of_failures = number_of_failures
        self.CI = CI
        if print_results is True:
            if time_terminated is True:
                print(
                    '\nReliability Test Planner results for time-terminated test'
                )
            else:
                print(
                    '\nReliability Test Planner results for failure-terminated test'
                )
            if soln_type == 'MTBF':
                print('Solving for MTBF')
            elif soln_type == 'failures':
                print('Solving for number_of_failures')
            elif soln_type == 'CI':
                print('Solving for CI')
            else:
                print('Solving for test_duration')
            print('Test duration:', self.test_duration)
            print('MTBF (lower confidence bound):', self.MTBF)
            print('Number of failures:', self.number_of_failures)
            print(
                str('Confidence interval (' + str(sides) + ' sided): ' +
                    str(self.CI)))
            if print_CI_warn is True:
                colorprint(
                    'WARNING: The calculated CI is less than 0.5. This indicates that the desired MTBF is unachievable for the specified test_duration and number_of_failures.',
                    text_color='red')
def sample_size_no_failures(reliability,
                            CI=0.95,
                            lifetimes=1,
                            weibull_shape=1,
                            print_results=True):
    '''
    This is used to determine the sample size required for a test in which no failures are expected, and the desired
    outcome is the lower bound on the reliability based on the sample size and desired confidence interval.

    inputs:
    reliability - lower bound on product reliability (between 0 and 1)
    CI - confidence interval of result (between 0.5 and 1). Defaults to 0.95 for 95% CI.
    lifetimes - if testing the product for multiple lifetimes then more failures are expected so a smaller sample
        size will be required to demonstrate the desired reliability (assuming no failures). Conversely, if testing for
        less than one full lifetime then a larger sample size will be required. Default is 1.
    weibull_shape - if the weibull shape (beta) of the failure mode is known, specify it here. Otherwise leave the
        default of 1 for the exponential distribution.
    print_results - if True the results will be printed to the console.

    returns:
    number of items required in the test. This will always be an integer (rounded up).
    '''
    if CI < 0.5 or CI >= 1:
        raise ValueError('CI must be between 0.5 and 1')
    if reliability <= 0 or reliability >= 1:
        raise ValueError('Reliability must be between 0 and 1')
    if weibull_shape < 0:
        raise ValueError(
            'Weibull shape must be greater than 0. Default (exponential distribution) is 1. If unknown then use 1.'
        )
    if lifetimes > 5:
        print(
            'Testing for greater than 5 lifetimes is highly unlikely to result in zero failures.'
        )
    if lifetimes <= 0:
        raise ValueError(
            'lifetimes must be >0. Default is 1. No more than 5 is recommended due to test feasibility.'
        )
    n = int(
        np.ceil((np.log(1 - CI)) /
                (lifetimes**weibull_shape *
                 np.log(reliability))))  # rounds up to nearest integer

    CI_rounded = CI * 100
    if CI_rounded % 1 == 0:
        CI_rounded = int(CI_rounded)
    if lifetimes != 1:
        lifetime_string = 'lifetimes.'
    else:
        lifetime_string = 'lifetime.'

    if print_results is True:
        colorprint('Results from sample_size_no_failures:',
                   bold=True,
                   underline=True)
        print('To achieve the desired reliability of', reliability, 'with a',
              str(str(CI_rounded) + '%'),
              'lower confidence bound, the required sample size to test is', n,
              'items.\n')
        print('This result is based on a specified weibull shape parameter of',
              weibull_shape, 'and an equivalent test duration of', lifetimes,
              lifetime_string)
        print(
            'If there are any failures during this test, then the desired lower confidence bound will not be achieved.'
        )
        print(
            'If this occurs, use the function Reliability_testing.one_sample_proportion to determine the lower and upper bounds on reliability.'
        )

    return n
def Probability_of_failure_normdist(stress=None,
                                    strength=None,
                                    show_distribution_plot=True,
                                    print_results=True,
                                    warn=True):
    '''
    Stress - Strength Interference for two Normal Distributions
    Given the probability distributions for stress and strength, this module will find the probability of failure due to stress-strength interference.
    Failure is defined as when stress>strength.
    Uses the exact formula method which is only valid for two Normal Distributions.

    Inputs:
    stress - a probability distribution from the Distributions module
    strength - a probability distribution from the Distributions module
    show_distribution_plot - True/False (default is True)
    print_results - True/False (default is True)
    warn - a warning will be issued if the stress.mean > strength.mean as the user may have assigned the distributions to the wrong variables. You can supress this using warn=False

    Returns:
    the probability of failure
    '''
    if type(stress) is not Normal_Distribution:
        raise ValueError(
            'Both stress and strength must be a Normal_Distribution. If you need another distribution then use Probability_of_failure rather than Probability_of_failure_normdist'
        )
    if type(strength) is not Normal_Distribution:
        raise ValueError(
            'Both stress and strength must be a Normal_Distribution. If you need another distribution then use Probability_of_failure rather than Probability_of_failure_normdist'
        )
    if stress.mean > strength.mean and warn == True:
        colorprint(
            'WARNING: The mean of the stress distribution is above the mean of the strength distribution. Please check you have assigned stress and strength to the correct variables. To supress this warning set warn=False',
            text_color='red')

    sigma_strength = strength.sigma
    mu_strength = strength.mu
    sigma_stress = stress.sigma
    mu_stress = stress.mu
    prob_of_failure = ss.norm.cdf(-(mu_strength - mu_stress) /
                                  ((sigma_strength**2 + sigma_stress**2)**0.5))

    if print_results is True:
        print('Probability of failure:', prob_of_failure)

    if show_distribution_plot is True:
        xlims = plt.xlim(auto=None)
        xmin = stress.quantile(0.00001)
        xmax = strength.quantile(0.99999)
        xvals = np.linspace(xmin, xmax, 1000)
        stress_PDF = stress.PDF(xvals=xvals, show_plot=False)
        strength_PDF = strength.PDF(xvals=xvals, show_plot=False)
        plt.plot(xvals, stress_PDF, label='Stress')
        plt.plot(xvals, strength_PDF, label='Strength')
        Y = [
            (min(strength_PDF[i], stress_PDF[i])) for i in range(len(xvals))
        ]  # finds the lower of the two lines which is used as the upper boundary for fill_between
        intercept_idx = Y.index(max(Y))
        plt.fill_between(xvals,
                         np.zeros_like(xvals),
                         Y,
                         color='salmon',
                         alpha=1,
                         linewidth=0,
                         linestyle='--')
        plt.fill_between(xvals[0:intercept_idx],
                         strength_PDF[0:intercept_idx],
                         stress_PDF[0:intercept_idx],
                         color='steelblue',
                         alpha=0.3,
                         linewidth=0,
                         linestyle='--')
        plt.fill_between(xvals[intercept_idx::],
                         stress_PDF[intercept_idx::],
                         strength_PDF[intercept_idx::],
                         color='darkorange',
                         alpha=0.3,
                         linewidth=0,
                         linestyle='--')
        failure_text = str('Probability of\nfailure = ' +
                           str(round_to_decimals(prob_of_failure, 4)))
        plt.legend(title=failure_text)
        plt.title('Stress - Strength Interference Plot')
        plt.ylabel('Probability Density')
        plt.xlabel('Stress and Strength Units')
        plt.subplots_adjust(left=0.15, right=0.93)
        if xlims != (0, 1):
            plt.xlim(min(stress.b5, xlims[0]),
                     max(strength.b95, xlims[1]),
                     auto=None)
        else:
            plt.xlim(stress.b5, strength.b95, auto=None)
        plt.ylim(bottom=0, auto=None)

    return prob_of_failure
def sequential_samling_chart(
    p1,
    p2,
    alpha,
    beta,
    show_plot=True,
    print_results=True,
    test_results=None,
    max_samples=100,
):
    """
    sequential_sampling_chart

    This function plots the accept/reject boundaries for a given set of quality and risk levels. If supplied, the test results are also
    plotted on the chart.

    A sequential sampling chart provides decision boundaries so that a success/failure test may be stopped as soon as there have been
    enough successes or enough failures to exceed the decision boundary. The decision boundary is calculated based on four parameters;
    producer's quality, consumer's quality, producer's risk, and consumer's risk. Producer's risk is the chance that the consumer rejects
    a batch when they should have accepted it. Consumer's risk is the chance that the consumer accepts a batch when they should have
    rejected it. We can also consider the producer's and consumer's quality to be the desired reliability of the sample, and the
    producer's and consumer's risk to be 1-confidence interval that the sample test result matches the population test result.

    Inputs:
    p1 - producer_quality. The acceptable failure rate for the producer (typical around 0.01)
    p2 - consumer_quality. The acceptable failure rate for the consumer (typical around 0.1)
    alpha - producer_risk. Producer's CI = 1-alpha (typically 0.05)
    beta - consumer_risk. Consumer's CI = 1-beta (typically 0.1)
    test_results - array or list of binary test results. eg. [0,0,0,1] for 3 successes and 1 failure. Default=None
    show_plot - True/False. Defaults to True.
    print_results - True/False. Defaults to True.
    max_samples - the x_lim of the plot. optional input. Default=100.

    Outputs:
    The sequential sampling chart - A plot of sequential sampling chart with decision boundaries. test_results are only plotted on the chart
    if provided as an input.
    results - a dataframe of tabulated decision results.

    """
    if type(test_results) == list:
        F = np.array(test_results)
    elif type(test_results) == np.ndarray:
        F = test_results
    elif test_results is None:
        F = None
    else:
        raise ValueError(
            "test_results must be a binary array or list with 1 as failures and 0 as successes. eg. [0 0 0 1] for 3 successes and 1 failure."
        )

    a = 1 - alpha
    b = 1 - beta
    d = np.log(p2 / p1) + np.log((1 - p1) / (1 - p2))
    h1 = np.log((1 - a) / b) / d
    h2 = np.log((1 - b) / a) / d
    s = np.log((1 - p1) / (1 - p2)) / d

    xvals = np.arange(max_samples + 1)
    rejection_line = s * xvals - h1
    acceptance_line = s * xvals + h2
    acceptance_line[acceptance_line < 0] = 0

    upper_line = np.ones_like(xvals) * (s * max_samples - h1)
    lower_line_range = np.linspace(-h2 / s, max_samples, max_samples + 1)
    acceptance_line2 = (
        s * lower_line_range + h2
    )  # this is the visible part of the line that starts beyond x=0

    acceptance_array = np.asarray(np.floor(s * xvals + h2), dtype=int)
    rejection_array = np.asarray(np.ceil(s * xvals - h1), dtype=int)
    for i, x in enumerate(
            xvals
    ):  # this replaces cases where the criteria exceeds the number of samples
        if rejection_array[i] > x:
            rejection_array[i] = -1

    data = {
        "Samples": xvals,
        "Failures to accept": acceptance_array,
        "Failures to reject": rejection_array,
    }
    df = pd.DataFrame(
        data, columns=["Samples", "Failures to accept", "Failures to reject"])
    df.loc[df["Failures to accept"] < 0, "Failures to accept"] = "x"
    df.loc[df["Failures to reject"] < 0, "Failures to reject"] = "x"

    if print_results is True:
        colorprint("Results from sequential_sampling_chart:",
                   bold=True,
                   underline=True)
        print(df.to_string(index=False), "\n")

    if show_plot is True:
        # plots the results of tests if they are specified
        if type(F) == np.ndarray:
            if all(F) not in [0, 1]:
                raise ValueError(
                    "test_results must be a binary array or list with 0 as failures and 1 as successes. eg. [0, 0, 0, 1] for 3 successes and 1 failure."
                )
            nx = []
            ny = []
            failure_count = 0
            sample_count = 0
            for f in F:
                if f == 0:
                    sample_count += 1
                    nx.append(sample_count)
                    ny.append(failure_count)
                elif f == 1:
                    sample_count += 1
                    nx.append(sample_count)
                    ny.append(failure_count)
                    failure_count += 1
                    nx.append(sample_count)
                    ny.append(failure_count)
                else:
                    raise ValueError(
                        "test_results must be a binary array or list with 0 as failures and 1 as successes. eg. [0 0 0 1] for 3 successes and 1 failure."
                    )
            plt.plot(nx, ny, label="test results")

        # plots the decision boundaries and shades the areas red and green
        plt.plot(lower_line_range,
                 acceptance_line2,
                 linestyle="--",
                 color="green")
        plt.plot(xvals, rejection_line, linestyle="--", color="red")
        plt.fill_between(
            xvals,
            rejection_line,
            upper_line,
            color="red",
            alpha=0.3,
            label="Reject sample",
        )
        plt.fill_between(
            xvals,
            acceptance_line,
            rejection_line,
            color="gray",
            alpha=0.1,
            label="Keep Testing",
        )
        plt.fill_between(
            lower_line_range,
            0,
            acceptance_line2,
            color="green",
            alpha=0.3,
            label="Accept Sample",
        )
        plt.ylim([0, max(rejection_line)])
        plt.xlim([0, max(xvals)])
        plt.xlabel("Number of samples tested")
        plt.ylabel("Number of failures from samples tested")
        plt.title("Sequential sampling decision boundaries")
        plt.legend()
        plt.show()
    return df
    def __init__(
        self,
        MTBF=None,
        number_of_failures=None,
        CI=None,
        test_duration=None,
        one_sided=True,
        time_terminated=True,
        print_results=True,
    ):

        print_CI_warn = False  # used later if the CI is calculated
        if CI is not None:
            if CI < 0.5 or CI >= 1:
                raise ValueError(
                    "CI must be between 0.5 and 1. For example, specify CI=0.95 for 95% confidence interval"
                )
            if one_sided is True:
                CI_adj = CI
            else:
                CI_adj = 1 - ((1 - CI) / 2)

        if time_terminated is True:
            p = 2
        elif time_terminated is False:
            p = 0
        else:
            raise ValueError(
                "time_terminated must be True or False. Default is True for the time terminated test (a test stopped after a set time rather than after a set number of failures)."
            )

        if one_sided is True:
            sides = 1
        elif one_sided is False:
            sides = 2
        else:
            raise ValueError(
                "one_sided must be True or False. Default is True for the one sided confidence interval."
            )

        if print_results not in [True, False]:
            raise ValueError(
                "print_results must be True or False. Default is True.")

        if number_of_failures is not None:
            if number_of_failures % 1 != 0 or number_of_failures < 0:
                raise ValueError(
                    "number_of_failures must be a positive integer")

        if (MTBF is None and number_of_failures is not None and CI is not None
                and test_duration is not None):
            soln_type = "MTBF"
            MTBF = (2 * test_duration) / ss.chi2.ppf(
                CI_adj, 2 * number_of_failures + p)

        elif (MTBF is not None and number_of_failures is None
              and CI is not None and test_duration is not None):
            soln_type = "failures"
            number_of_failures = 0
            while (
                    True
            ):  # this requires an iterative search. Begins at 0 and increments by 1 until the solution is found
                result = (2 * test_duration) / ss.chi2.ppf(
                    CI_adj, 2 * number_of_failures + p) - MTBF
                if (
                        result < 0
                ):  # solution is found when result returns a negative number (indicating too many failures)
                    break
                number_of_failures += 1
            number_of_failures -= 1  # correction for the last failure added to ensure we keep the MTBF above the minimum requirement

            MTBF_check = (2 * test_duration) / ss.chi2.ppf(
                CI_adj, 2 * 0 + p
            )  # checks that the maximum possible MTBF (when there are 0 failures) is within the test_duration
            if MTBF_check < MTBF:
                raise ValueError(
                    "The specified MTBF is not possible given the specified test_duration. You must increase your test_duration or decrease your MTBF."
                )

        elif (MTBF is not None and number_of_failures is not None
              and CI is None and test_duration is not None):
            soln_type = "CI"
            CI_calc = ss.chi2.cdf(test_duration / (MTBF * 0.5),
                                  2 * number_of_failures + p)
            if one_sided is True:
                CI = CI_calc
            else:
                CI = 1 - (
                    2 * (1 - CI_calc)
                )  # this can give negative numbers, but only when the inputs result in an impossible CI.
            if CI < 0.5:
                print_CI_warn = True

        elif (MTBF is not None and number_of_failures is not None
              and CI is not None and test_duration is None):
            soln_type = "test_duration"
            test_duration = ss.chi2.ppf(CI_adj,
                                        2 * number_of_failures + p) * MTBF / 2

        elif (MTBF is not None and number_of_failures is not None
              and CI is not None and test_duration is not None):
            raise ValueError(
                "All inputs were specified. Nothing to calculate.")

        else:
            raise ValueError(
                "More than one input was not specified. You must specify any 3 out of the 4 inputs (not including one_sided or print_results) and the remaining input will be calculated."
            )

        self.test_duration = test_duration
        self.MTBF = MTBF
        self.number_of_failures = number_of_failures
        self.CI = CI
        if print_results is True:
            if time_terminated is True:
                print(
                    "\nReliability Test Planner results for time-terminated test:"
                )
            else:
                print(
                    "\nReliability Test Planner results for failure-terminated test:"
                )
            if soln_type == "MTBF":
                print("Solving for MTBF")
            elif soln_type == "failures":
                print("Solving for number_of_failures")
            elif soln_type == "CI":
                print("Solving for CI")
            else:
                print("Solving for test_duration")
            print("Test duration:", self.test_duration)
            print("MTBF (lower confidence bound):", self.MTBF)
            print("Number of failures:", self.number_of_failures)
            print(
                str("Confidence interval (" + str(sides) + " sided): " +
                    str(self.CI)))
            if print_CI_warn is True:
                colorprint(
                    "WARNING: The calculated CI is less than 0.5. This indicates that the desired MTBF is unachievable for the specified test_duration and number_of_failures.",
                    text_color="red",
                )
示例#11
0
    def __init__(self,
                 cost_PM,
                 cost_CM,
                 weibull_alpha,
                 weibull_beta,
                 show_time_plot=True,
                 show_ratio_plot=True,
                 print_results=True,
                 q=0,
                 **kwargs):
        if "color" in kwargs:
            c = kwargs.pop("color")
        else:
            c = "steelblue"
        if cost_PM > cost_CM:
            raise ValueError(
                "Cost_PM must be less than Cost_CM otherwise preventative maintenance should not be conducted."
            )
        if weibull_beta < 1:
            colorprint(
                "WARNING: weibull_beta is < 1 so the hazard rate is decreasing, therefore preventative maintenance should not be conducted.",
                text_color="red",
            )

        if q == 1:  # as good as old
            alpha_multiple = 4
            t = np.linspace(1, weibull_alpha * alpha_multiple, 100000)
            CPUT = ((cost_PM *
                     (t / weibull_alpha)**weibull_beta) + cost_CM) / t
            ORT = weibull_alpha * ((cost_CM /
                                    (cost_PM *
                                     (weibull_beta - 1)))**(1 / weibull_beta))
            min_cost = ((cost_PM *
                         (ORT / weibull_alpha)**weibull_beta) + cost_CM) / ORT
        elif q == 0:  # as good as new
            alpha_multiple = 3
            t = np.linspace(1, weibull_alpha * alpha_multiple, 10000)

            # survival function and its integral
            calc_SF = lambda x: np.exp(-((x / weibull_alpha)**weibull_beta))
            integrate_SF = lambda x: integrate.quad(calc_SF, 0, x)[0]

            # vectorize them
            vcalc_SF = np.vectorize(calc_SF)
            vintegrate_SF = np.vectorize(integrate_SF)

            # calculate the SF and intergral at each time
            sf = vcalc_SF(t)
            integral = vintegrate_SF(t)

            CPUT = (cost_PM * sf + cost_CM * (1 - sf)) / integral
            idx = np.argmin(CPUT)
            min_cost = CPUT[idx]  # minimum cost per unit time
            ORT = t[idx]  # optimal replacement time
        else:
            raise ValueError(
                'q must be 0 or 1. Default is 0. Use 0 for "as good as new" and use 1 for "as good as old".'
            )
        self.ORT = ORT
        self.min_cost = min_cost
        min_cost_rounded = round_to_decimals(min_cost, 2)
        ORT_rounded = round_to_decimals(ORT, 2)

        if print_results is True:
            colorprint("Results from optimal_replacement_time:",
                       bold=True,
                       underline=True)
            if q == 0:
                print("Cost model assuming as good as new replacement (q=0):")
            else:
                print("Cost model assuming as good as old replacement (q=1):")
            print(
                "The minimum cost per unit time is",
                min_cost_rounded,
                "\nThe optimal replacement time is",
                ORT_rounded,
            )

        if (show_time_plot is True
                or issubclass(type(show_time_plot), SubplotBase) is True):
            if issubclass(type(show_time_plot), SubplotBase) is True:
                plt.sca(ax=show_time_plot)  # use the axes passed
            else:
                plt.figure()  # if no axes is passed, make a new figure
            plt.plot(t, CPUT, color=c, **kwargs)
            plt.plot(ORT, min_cost, "o", color=c)
            text_str = str("\nMinimum cost per unit time is " +
                           str(min_cost_rounded) +
                           "\nOptimal replacement time is " + str(ORT_rounded))
            plt.text(ORT, min_cost, text_str, va="top")
            plt.xlabel("Replacement time")
            plt.ylabel("Cost per unit time")
            plt.title("Optimal replacement time estimation")
            plt.ylim([0, min_cost * 2])
            plt.xlim([0, weibull_alpha * alpha_multiple])

        if (show_ratio_plot is True
                or issubclass(type(show_ratio_plot), SubplotBase) is True):
            if issubclass(type(show_ratio_plot), SubplotBase) is True:
                plt.sca(ax=show_ratio_plot)  # use the axes passed
            else:
                plt.figure()  # if no axes is passed, make a new figure
            xupper = np.round(cost_CM / cost_PM, 0) * 2
            CC_CP = np.linspace(1, xupper, 200)  # cost CM / cost PM
            CC = CC_CP * cost_PM
            ORT_array = []  # optimal replacement time

            # get the ORT from the minimum CPUT for each CC
            if q == 1:
                calc_ORT = lambda x: weibull_alpha * (
                    (x / (cost_PM * (weibull_beta - 1)))**(1 / weibull_beta))
            else:  # q = 0
                calc_ORT = lambda x: t[np.argmin((cost_PM * sf + x *
                                                  (1 - sf)) / integral)]

            vcalc_ORT = np.vectorize(calc_ORT)
            ORT_array = vcalc_ORT(CC)

            plt.plot(CC_CP, ORT_array)
            plt.xlim(1, xupper)
            plt.ylim(0, self.ORT * 2)
            plt.scatter(cost_CM / cost_PM, self.ORT)
            # vertical alignment based on plot increasing or decreasing
            if ORT_array[50] > ORT_array[40]:
                va = "top"
                mult = 0.95
            else:
                va = "bottom"
                mult = 1.05
            plt.text(
                s=str("$cost_{CM} = $" + str(cost_CM) + "\n$cost_{PM} = $" +
                      str(cost_PM) + "\nInterval = " +
                      str(round_to_decimals(self.ORT, 2))),
                x=cost_CM / cost_PM * 1.05,
                y=self.ORT * mult,
                ha="left",
                va=va,
            )
            plt.xlabel(r"Cost ratio $\left(\frac{CM}{PM}\right)$")
            plt.ylabel("Replacement Interval")
            plt.title(
                "Optimal replacement interval\nacross a range of CM costs")
示例#12
0
def reliability_test_duration(
    MTBF_required,
    MTBF_design,
    consumer_risk,
    producer_risk,
    one_sided=True,
    time_terminated=True,
    show_plot=True,
    print_results=True,
):
    """
    This function calculates the required duration for a reliability test to
    achieve the specified producers and consumers risks. This is done based on
    the specified MTBF required and MTBF design. For details please see the
    `algorithm <https://reliability.readthedocs.io/en/latest/Reliability%20test%20duration.html#how-does-the-algorithm-work>`_.

    Parameters
    ----------
    MTBF_required : float, int
        The required MTBF that the equipment must demonstrate during the test.
    MTBF_design : float, int
        The design target for the MTBF that the producer aims to achieve.
    consumer_risk : float
        The risk the consumer is accepting. This is the probability that a bad
        product will be accepted as a good product by the consumer.
    producer_risk : float
        The risk the producer is accepting. This is the probability that a good
        product will be rejected as a bad product by the consumer.
    one_sided : bool, optional
        The risk is analogous to the confidence interval, and the confidence
        interval can be one sided or two sided. Default = True.
    time_terminated : bool, optional
        Whether the test is time terminated or failure terminated. Typically it
        will be time terminated if the required test duration is sought.
        Default = True
    show_plot : bool
        If True, this will create a plot of the risk vs test duration. Default =
        True.
    print_results : bool, optional
        If True, this will print the results to the console. Default = True.

    Returns
    -------
    test_duration : float
        The required test duration to meet the input parameters.

    Notes
    -----
    The number of failures allowed is calculated but not provided by this
    function since the test will determine the actual number of failures so any
    prediction of number of failures ahead of time is not practical.

    If the plot does not show automatically, use plt.show() to show it.
    """

    if consumer_risk <= 0 or consumer_risk > 0.5:
        raise ValueError("consumer_risk must be between 0 and 0.5")
    if producer_risk <= 0 or producer_risk > 0.5:
        raise ValueError("producer_risk must be between 0 and 0.5")
    if MTBF_design <= MTBF_required:
        raise ValueError("MTBF_design must exceed MTBF_required")
    if one_sided not in [True, False]:
        raise ValueError("one_sided must be True or False. Default is True")
    if time_terminated not in [True, False]:
        raise ValueError(
            "time_terminated must be True or False. Default is True")
    if show_plot not in [True, False]:
        raise ValueError("show_plot must be True or False. Default is True")
    if print_results not in [True, False]:
        raise ValueError(
            "print_results must be True or False. Default is True")

    duration_array = []
    producer_risk_array = []
    failures = 0  # initial counter. Incremented each iteration
    solution_index = False  # initial vlue to be updated later
    max_failures = 1e10  # initial value to be updated later
    event_check = False
    time_start = time.time()
    time_out = 10  # seconds until first warning about long runtime
    while True:
        result1 = reliability_test_planner(
            number_of_failures=failures,
            CI=1 - consumer_risk,
            MTBF=MTBF_required,
            one_sided=one_sided,
            time_terminated=time_terminated,
            print_results=False,
        )  # finds the test duration based on MTBF required and consumer risk
        result2 = reliability_test_planner(
            MTBF=MTBF_design,
            test_duration=result1.test_duration,
            number_of_failures=failures,
            one_sided=one_sided,
            time_terminated=time_terminated,
            print_results=False,
        )  # finds the producer risk based on test duration and MTBR of design
        duration_array.append(result1.test_duration)
        producer_risk_array.append(result2.CI)
        if (
                producer_risk_array[-1] < producer_risk
                and event_check is False
        ):  # check whether the number of failures resulted in the correct producer risk
            solution_index = (
                failures - 1
            )  # we have exceeded the target so need to go back one to find the point it was below, and one more to find the point it was above
            max_failures = solution_index * 1.5
            event_check = True
        if failures > max_failures:
            break
        failures += 1  # increment failures
        if time.time() - time_start > time_out:
            colorprint(
                str("WARNING: The algorithm is taking a long time to find the solution. This is probably because MTBF_required is too close to MTBF_design so the item struggles to pass the test. --- Current runtime: "
                    + str(int(round(time.time() - time_start, 0))) +
                    " seconds"),
                text_color="red",
            )
            time_out += 10

    duration_solution = duration_array[solution_index]
    if print_results is True:
        if time_terminated is True:
            print(
                "\nReliability Test Duration Solver for time-terminated test:")
        else:
            print(
                "\nReliability Test Duration Solver for failure-terminated test:"
            )
        print("Required test duration:", duration_solution)
        print("Specified consumer's risk:", consumer_risk)
        print("Specified producer's risk:", producer_risk)
        print("Specified MTBF required by the consumer:", MTBF_required)
        print("Specified MTBF designed to by the producer:", MTBF_design)

    if show_plot is True:
        consumer_risk_array = np.ones_like(duration_array) * consumer_risk
        plt.plot(duration_array, producer_risk_array, label="Producer's risk")
        plt.plot(duration_array, consumer_risk_array, label="Consumer's risk")
        plt.scatter(
            duration_array,
            producer_risk_array,
            color="k",
            marker=".",
            label="Failure events",
        )

        plt.xlabel("Test duration")
        plt.ylabel("Risk")
        plt.legend(loc="upper right")
        if len(duration_array) > 1:
            plt.xlim(min(duration_array), max(duration_array))
        plt.axvline(x=duration_solution,
                    color="k",
                    linestyle="--",
                    linewidth=1)
        plt.title("Test duration vs Producer's and Consumer's Risk")
        plt.text(
            x=duration_solution,
            y=plt.ylim()[0],
            s=str(" Test duration\n " +
                  str(int(math.ceil(duration_solution)))),
            va="bottom",
        )
    return duration_solution
示例#13
0
def one_sample_proportion(trials=None,
                          successes=None,
                          CI=0.95,
                          print_results=True):
    """
    Calculates the upper and lower bounds of reliability for a given number of
    trials and successes.

    Parameters
    ----------
    trials : int
        The number of trials which were conducted.
    successes : int
        The number of trials which were successful.
    CI : float, optional
        The desired confidence interval. Must be between 0 and 1. Default = 0.95
        for 95% CI.
    print_results : bool, optional
        If True the results will be printed to the console. Default = True.

    Returns
    -------
    limits : tuple
        The confidence interval limits in the form (lower,upper).
    """
    if trials is None or successes is None:
        raise ValueError(
            "You must specify the number of trials and successes.")
    if successes > trials:
        raise ValueError("successes cannot be greater than trials")
    if successes == 0 or successes == trials:  # calculate 1 sided CI in these cases
        n = 1
    else:
        n = 2
    if type(trials) is not int:
        raise ValueError("trials must be an integer")
    if type(successes) is not int:
        raise ValueError("successes must be an integer")

    V1_lower = 2 * successes
    V2_lower = 2 * (trials - successes + 1)
    alpha_lower = (1 - CI) / n
    F_lower = ss.f.ppf(alpha_lower, V1_lower, V2_lower)
    LOWER_LIM = (V1_lower * F_lower) / (V2_lower + V1_lower * F_lower)

    LOWER_LIM = np.nan_to_num(LOWER_LIM, nan=0)
    if LOWER_LIM == 0:
        LOWER_LIM = int(0)

    V1_upper = 2 * (successes + 1)
    V2_upper = 2 * (trials - successes)
    alpha_upper = 1 - alpha_lower
    F_upper = ss.f.ppf(alpha_upper, V1_upper, V2_upper)
    UPPER_LIM = (V1_upper * F_upper) / (V2_upper + V1_upper * F_upper)

    UPPER_LIM = np.nan_to_num(UPPER_LIM, nan=1)
    if UPPER_LIM == 1:
        UPPER_LIM = int(1)

    CI_rounded = CI * 100
    if CI_rounded % 1 == 0:
        CI_rounded = int(CI_rounded)

    if print_results is True:
        colorprint("Results from one_sample_proportion:",
                   bold=True,
                   underline=True)
        print(
            "For a test with",
            trials,
            "trials of which there were",
            successes,
            "successes and",
            trials - successes,
            "failures, the bounds on reliability are:",
        )
        print("Lower", str(str(CI_rounded) + "%"), "confidence bound:",
              LOWER_LIM)
        print("Upper", str(str(CI_rounded) + "%"), "confidence bound:",
              UPPER_LIM)

    return (
        LOWER_LIM,
        UPPER_LIM,
    )  # will return nan for lower or upper if only one sided CI is calculated (ie. when successes=0 or successes=trials).
示例#14
0
def sequential_sampling_chart(
    p1,
    p2,
    alpha,
    beta,
    show_plot=True,
    print_results=True,
    test_results=None,
    max_samples=100,
):
    """
    This function plots the accept/reject boundaries for a given set of quality
    and risk levels. If supplied, the test results are also plotted on the
    chart.

    A sequential sampling chart provides decision boundaries so that a
    success/failure test may be stopped as soon as there have been enough
    successes or enough failures to exceed the decision boundary. The decision
    boundary is calculated based on four parameters; producer's quality,
    consumer's quality, producer's risk, and consumer's risk. Producer's risk
    is the chance that the consumer rejects a batch when they should have
    accepted it. Consumer's risk is the chance that the consumer accepts a batch
    when they should have rejected it. We can also consider the producer's and
    consumer's quality to be the desired reliability of the sample, and the
    producer's and consumer's risk to be 1-confidence interval that the sample
    test result matches the population test result.

    Parameters
    ----------
    p1 : float
        The producer's quality. This is the acceptable failure rate for the
        producer. Must be between 0 and 1 but is usually very small, typically
        around 0.01.
    p2 : float
        The consumer's quality. This is the acceptable failure rate for the
        consumer. Must be between 0 and 1 but is usually very small, typically
        around 0.1.
    alpha : float
        The producer's risk. The probability of accepting a batch when it should
        have been rejected. Producer's CI = 1-alpha. Must be between 0 and 1 but
        is usually very small, typically 0.05.
    beta : float
        The consumer's risk. The probability of the consumer rejecting a batch
        when it should have been accepted. Consumer's CI = 1-beta. Must be
        between 0 and 1 but is usually very small, typically 0.1.
    test_results : array, list, optional
        The binary test results. eg. [0,0,0,1] represents 3 successes and 1
        failure. Default=None. Use 0 for success and 1 for failure as this test
        is counting the number of failures.
    show_plot : bool, optional
        If True the plot will be produced. Default = True.
    print_results : bool, optional
        If True the results will be printed to the console. Default = True.
    max_samples : int, optional
        The upper x-limit of the plot. Default = 100.

    Returns
    -------
    results : dataframe
        A dataframe of tabulated decision results with the columns "Samples",
        "Failures to accept", "Failures to reject". This is independent of the
        test_results provided.

    Notes
    -----
    If show_plot is True, the sequential sampling chart with decision boundaries
    will be produced. The test_results are only plotted on the chart if provided
    as an input. The chart will display automatically so plt.show() is not
    required.
    """
    if type(test_results) in [list, np.ndarray]:
        F = np.asarray(test_results)
    elif test_results is None:
        F = None
    else:
        raise ValueError(
            "test_results must be a binary array or list with 1 as failures and 0 as successes. eg. [0 0 0 1] represents 3 successes and 1 failure."
        )

    if alpha <= 0 or alpha >= 1:
        raise ValueError("alpha must be between 0 and 1")
    if beta <= 0 or beta >= 1:
        raise ValueError("beta must be between 0 and 1")
    if p1 <= 0 or p1 >= 1:
        raise ValueError("p1 must be between 0 and 1")
    if p2 <= 0 or p2 >= 1:
        raise ValueError("p2 must be between 0 and 1")

    a = 1 - alpha
    b = 1 - beta
    d = np.log(p2 / p1) + np.log((1 - p1) / (1 - p2))
    h1 = np.log((1 - a) / b) / d
    h2 = np.log((1 - b) / a) / d
    s = np.log((1 - p1) / (1 - p2)) / d

    xvals = np.arange(max_samples + 1)
    rejection_line = s * xvals - h1
    acceptance_line = s * xvals + h2
    acceptance_line[acceptance_line < 0] = 0

    upper_line = np.ones_like(xvals) * (s * max_samples - h1)
    lower_line_range = np.linspace(-h2 / s, max_samples, max_samples + 1)
    acceptance_line2 = s * lower_line_range + h2
    # this is the visible part of the line that starts beyond x=0

    acceptance_array = np.asarray(np.floor(s * xvals + h2), dtype=int)
    rejection_array = np.asarray(np.ceil(s * xvals - h1), dtype=int)
    for i, x in enumerate(xvals):
        # this replaces cases where the criteria exceeds the number of samples
        if rejection_array[i] > x:
            rejection_array[i] = -1

    data = {
        "Samples": xvals,
        "Failures to accept": acceptance_array,
        "Failures to reject": rejection_array,
    }
    df = pd.DataFrame(
        data, columns=["Samples", "Failures to accept", "Failures to reject"])
    df.loc[df["Failures to accept"] < 0, "Failures to accept"] = "x"
    df.loc[df["Failures to reject"] < 0, "Failures to reject"] = "x"

    if print_results is True:
        colorprint("Results from sequential_sampling_chart:",
                   bold=True,
                   underline=True)
        print(df.to_string(index=False), "\n")

    if show_plot is True:
        # plots the results of tests if they are specified
        if type(F) == np.ndarray:
            nx = []
            ny = []
            failure_count = 0
            sample_count = 0
            for f in F:
                if f == 0:
                    sample_count += 1
                    nx.append(sample_count)
                    ny.append(failure_count)
                elif f == 1:
                    sample_count += 1
                    nx.append(sample_count)
                    ny.append(failure_count)
                    failure_count += 1
                    nx.append(sample_count)
                    ny.append(failure_count)
                else:
                    raise ValueError(
                        "test_results must be an array or list with 0 as failures and 1 as successes. eg. [0 0 0 1] represents 3 successes and 1 failure."
                    )
            plt.plot(nx, ny, label="test results")

        # plots the decision boundaries and shades the areas red and green
        plt.plot(lower_line_range,
                 acceptance_line2,
                 linestyle="--",
                 color="green")
        plt.plot(xvals, rejection_line, linestyle="--", color="red")
        plt.fill_between(
            xvals,
            rejection_line,
            upper_line,
            color="red",
            alpha=0.3,
            label="Reject sample",
        )
        plt.fill_between(
            xvals,
            acceptance_line,
            rejection_line,
            color="gray",
            alpha=0.1,
            label="Keep Testing",
        )
        plt.fill_between(
            lower_line_range,
            0,
            acceptance_line2,
            color="green",
            alpha=0.3,
            label="Accept Sample",
        )
        plt.ylim([0, max(rejection_line)])
        plt.xlim([0, max(xvals)])
        plt.xlabel("Number of samples tested")
        plt.ylabel("Number of failures from samples tested")
        plt.title("Sequential sampling decision boundaries")
        plt.legend()
        plt.show()
    return df
示例#15
0
def sample_size_no_failures(reliability,
                            CI=0.95,
                            lifetimes=1,
                            weibull_shape=1,
                            print_results=True):
    """
    This is used to determine the sample size required for a test in which no
    failures are expected, and the desired outcome is the lower bound on the
    reliability based on the sample size and desired confidence interval.

    Parameters
    ----------
    reliability : float
        The lower bound on product reliability. Must be between 0 and 1.
    CI : float, optional
        The confidence interval of the result. Must be between 0.5 and 1 since
        a confidence less than 50% is not meaningful. Default = 0.95 for 95% CI.
    lifetimes : int, float, optional
        If testing the product for multiple lifetimes then more failures are
        expected so a smaller sample size will be required to demonstrate the
        desired reliability (assuming no failures). Conversely, if testing for
        less than one full lifetime then a larger sample size will be required.
        Default = 1. Must be greater than 0. No more than 5 is recommended due
        to test feasibility.
    weibull_shape : int, float, optional
        If the weibull shape (beta) of the failure mode is known, specify it
        here. Otherwise leave the default of 1 for the exponential distribution.
    print_results : bool, optional
        If True the results will be printed to the console. Default = True.

    Returns
    -------
    n : int
        The number of items required in the test. This will always be an integer
        (rounded up).
    """
    if CI < 0.5 or CI >= 1:
        raise ValueError("CI must be between 0.5 and 1")
    if reliability <= 0 or reliability >= 1:
        raise ValueError("Reliability must be between 0 and 1")
    if weibull_shape < 0:
        raise ValueError(
            "Weibull shape must be greater than 0. Default (exponential distribution) is 1. If unknown then use 1."
        )
    if lifetimes > 5:
        print(
            "Testing for greater than 5 lifetimes is highly unlikely to result in zero failures."
        )
    if lifetimes <= 0:
        raise ValueError(
            "lifetimes must be >0. Default is 1. No more than 5 is recommended due to test feasibility."
        )
    n = int(
        np.ceil((np.log(1 - CI)) /
                (lifetimes**weibull_shape *
                 np.log(reliability))))  # rounds up to nearest integer

    CI_rounded = CI * 100
    if CI_rounded % 1 == 0:
        CI_rounded = int(CI_rounded)
    if lifetimes != 1:
        lifetime_string = "lifetimes."
    else:
        lifetime_string = "lifetime."

    if print_results is True:
        colorprint("Results from sample_size_no_failures:",
                   bold=True,
                   underline=True)
        print(
            "To achieve the desired reliability of",
            reliability,
            "with a",
            str(str(CI_rounded) + "%"),
            "lower confidence bound, the required sample size to test is",
            n,
            "items.\n",
        )
        print(
            "This result is based on a specified weibull shape parameter of",
            weibull_shape,
            "and an equivalent test duration of",
            lifetimes,
            lifetime_string,
        )
        print(
            "If there are any failures during this test, then the desired lower confidence bound will not be achieved."
        )
        print(
            "If this occurs, use the function Reliability_testing.one_sample_proportion to determine the lower and upper bounds on reliability."
        )

    return n
示例#16
0
    def __init__(self,
                 data,
                 CI=0.95,
                 print_results=True,
                 show_plot=True,
                 plot_CI=True,
                 **kwargs):

        # check input is a list
        if type(data) == list:
            pass
        elif type(data) == np.ndarray:
            data = list(data)
        else:
            raise ValueError('data must be a list or numpy array')

        # check each item is a list and fix up any ndarrays to be lists.
        test_for_single_system = []
        for i, item in enumerate(data):
            if type(item) == list:
                test_for_single_system.append(False)
            elif type(item) == np.ndarray:
                data[i] = list(item)
                test_for_single_system.append(False)
            elif type(item) == int or type(item) == float:
                test_for_single_system.append(True)
            else:
                raise ValueError(
                    'Each item in the data must be a list or numpy array. eg. data = [[1,3,5],[3,6,8]]'
                )
        # Wraps the data in another list if all elements were numbers.
        if all(test_for_single_system):  # checks if all are True
            data = [data]
        elif not any(test_for_single_system):  # checks if all are False
            pass
        else:
            raise ValueError(
                'Mixed data types found in the data. Each item in the data must be a list or numpy array. eg. data = [[1,3,5],[3,6,8]].'
            )

        end_times = []
        repair_times = []
        for system in data:
            system.sort()  # sorts the values in ascending order
            for i, t in enumerate(system):
                if i < len(system) - 1:
                    repair_times.append(t)
                else:
                    end_times.append(t)

        if CI < 0 or CI > 1:
            raise ValueError(
                'CI must be between 0 and 1. Default is 0.95 for 95% confidence intervals (two sided).'
            )

        if max(end_times) < max(repair_times):
            raise ValueError(
                'The final end time must not be less than the final repair time.'
            )
        last_time = max(end_times)
        C_array = ['C'] * len(end_times)
        F_array = ['F'] * len(repair_times)

        Z = -ss.norm.ppf(1 - CI)  # confidence interval converted to Z-value

        # sort the inputs and extract the sorted values for later use
        times = np.hstack([repair_times, end_times])
        states = np.hstack([F_array, C_array])
        data = {'times': times, 'states': states}
        df = pd.DataFrame(data, columns=['times', 'states'])
        df_sorted = df.sort_values(
            by=['times', 'states'], ascending=[True, False]
        )  # sorts the df by times and then by states, ensuring that states are F then C where the same time occurs. This ensures a failure is counted then the item is retired.
        times_sorted = df_sorted.times.values
        states_sorted = df_sorted.states.values

        # MCF calculations
        MCF_array = []
        Var_array = []
        MCF_lower_array = []
        MCF_upper_array = []
        r = len(end_times)
        r_inv = 1 / r
        C_seq = 0  # sequential number of censored values
        for i in range(len(times)):
            if i == 0:
                if states_sorted[i] == 'F':  # first event is a failure
                    MCF_array.append(r_inv)
                    Var_array.append((r_inv**2) * ((1 - r_inv)**2 + (r - 1) *
                                                   (0 - r_inv)**2))
                    MCF_lower_array.append(MCF_array[i] / np.exp(
                        (Z * Var_array[i]**0.5) / MCF_array[i]))
                    MCF_upper_array.append(MCF_array[i] * np.exp(
                        (Z * Var_array[i]**0.5) / MCF_array[i]))
                else:  # first event is censored
                    MCF_array.append('')
                    Var_array.append('')
                    MCF_lower_array.append('')
                    MCF_upper_array.append('')
                    r -= 1
                    if times_sorted[
                            i] not in end_times:  # check if this system only has one event. If not then increment the number censored count for this system
                        C_seq += 1
            else:  # everything after the first time
                if states_sorted[i] == 'F':  # failure event
                    i_adj = i - C_seq
                    r_inv = 1 / r
                    if MCF_array[
                            i_adj -
                            1] == '':  # this is the case where the first system only has one event that was censored and there is no data on the first line
                        MCF_array.append(r_inv)
                        Var_array.append(
                            (r_inv**2) * ((1 - r_inv)**2 + (r - 1) *
                                          (0 - r_inv)**2))
                        MCF_lower_array.append(MCF_array[i] / np.exp(
                            (Z * Var_array[i]**0.5) / MCF_array[i]))
                        MCF_upper_array.append(MCF_array[i] * np.exp(
                            (Z * Var_array[i]**0.5) / MCF_array[i]))
                    else:  # this the normal case where there was previous data
                        MCF_array.append(r_inv + MCF_array[i_adj - 1])
                        Var_array.append((r_inv**2) *
                                         ((1 - r_inv)**2 + (r - 1) *
                                          (0 - r_inv)**2) +
                                         Var_array[i_adj - 1])
                        MCF_lower_array.append(MCF_array[i] / np.exp(
                            (Z * Var_array[i]**0.5) / MCF_array[i]))
                        MCF_upper_array.append(MCF_array[i] * np.exp(
                            (Z * Var_array[i]**0.5) / MCF_array[i]))
                    C_seq = 0
                else:  # censored event
                    r -= 1
                    C_seq += 1
                    MCF_array.append('')
                    Var_array.append('')
                    MCF_lower_array.append('')
                    MCF_upper_array.append('')
                    if r > 0:
                        r_inv = 1 / r

        # format output as dataframe
        data = {
            'state': states_sorted,
            'time': times_sorted,
            'MCF_lower': MCF_lower_array,
            'MCF': MCF_array,
            'MCF_upper': MCF_upper_array,
            'variance': Var_array
        }
        printable_results = pd.DataFrame(data,
                                         columns=[
                                             'state', 'time', 'MCF_lower',
                                             'MCF', 'MCF_upper', 'variance'
                                         ])
        self.results = printable_results

        indices_to_drop = printable_results[printable_results['state'] ==
                                            'C'].index
        plotting_results = printable_results.drop(indices_to_drop,
                                                  inplace=False)
        RESULTS_time = plotting_results.time.values
        RESULTS_MCF = plotting_results.MCF.values
        RESULTS_variance = plotting_results.variance.values
        RESULTS_lower = plotting_results.MCF_lower.values
        RESULTS_upper = plotting_results.MCF_upper.values

        self.time = list(RESULTS_time)
        self.MCF = list(RESULTS_MCF)
        self.lower = list(RESULTS_lower)
        self.upper = list(RESULTS_upper)
        self.variance = list(RESULTS_variance)

        CI_rounded = CI * 100
        if CI_rounded % 1 == 0:
            CI_rounded = int(CI * 100)

        if print_results is True:
            pd.set_option('display.width',
                          200)  # prevents wrapping after default 80 characters
            pd.set_option('display.max_columns',
                          9)  # shows the dataframe without ... truncation
            colorprint(str('Mean Cumulative Function results (' +
                           str(CI_rounded) + '% CI)'),
                       bold=True,
                       underline=True)
            print(self.results.to_string(index=False), '\n')

        if show_plot is True:
            x_MCF = [0, RESULTS_time[0]]
            y_MCF = [0, 0]
            y_upper = [0, 0]
            y_lower = [0, 0]
            x_MCF.append(RESULTS_time[0])
            y_MCF.append(RESULTS_MCF[0])
            y_upper.append(RESULTS_upper[0])
            y_lower.append(RESULTS_lower[0])
            for i, t in enumerate(RESULTS_time):
                if i > 0:
                    x_MCF.append(RESULTS_time[i])
                    y_MCF.append(RESULTS_MCF[i - 1])
                    y_upper.append(RESULTS_upper[i - 1])
                    y_lower.append(RESULTS_lower[i - 1])
                    x_MCF.append(RESULTS_time[i])
                    y_MCF.append(RESULTS_MCF[i])
                    y_upper.append(RESULTS_upper[i])
                    y_lower.append(RESULTS_lower[i])
            x_MCF.append(last_time)  # add the last horizontal line
            y_MCF.append(RESULTS_MCF[-1])
            y_upper.append(RESULTS_upper[-1])
            y_lower.append(RESULTS_lower[-1])
            title_str = 'Non-parametric estimate of the Mean Cumulative Function'

            if 'color' in kwargs:
                col = kwargs.pop('color')
            else:
                col = 'steelblue'
            if plot_CI is True:
                plt.fill_between(x_MCF,
                                 y_lower,
                                 y_upper,
                                 color=col,
                                 alpha=0.3,
                                 linewidth=0)
                title_str = str(title_str + '\nwith ' + str(CI_rounded) +
                                '% one-sided confidence interval bounds')
            plt.plot(x_MCF, y_MCF, color=col, **kwargs)
            plt.xlabel('Time')
            plt.ylabel('Mean cumulative number of failures')
            plt.title(title_str)
            plt.xlim(0, last_time)
            plt.ylim(0, max(RESULTS_upper) * 1.05)
示例#17
0
    def __init__(self,
                 data,
                 CI=0.95,
                 plot_CI=True,
                 print_results=True,
                 show_plot=True,
                 **kwargs):

        if CI <= 0 or CI >= 1:
            raise ValueError(
                'CI must be between 0 and 1. Default is 0.95 for 95% Confidence interval.'
            )

        MCF_NP = MCF_nonparametric(
            data=data, print_results=False, show_plot=False
        )  # all the MCF calculations to get the plot points are done in MCF_nonparametric
        self.times = MCF_NP.time
        self.MCF = MCF_NP.MCF

        # initial guess using least squares regression of linearised function
        ln_x = np.log(self.times)
        ln_y = np.log(self.MCF)
        guess_fit = np.polyfit(ln_x, ln_y, deg=1)
        beta_guess = guess_fit[0]
        alpha_guess = np.exp(-guess_fit[1] / beta_guess)
        guess = [
            alpha_guess, beta_guess
        ]  # guess for curve_fit. This guess is good but curve fit makes it much better.

        # actual fitting using curve_fit with initial guess from least squares
        def __MCF_eqn(t, a, b):  # objective function for curve_fit
            return (t / a)**b

        fit = curve_fit(__MCF_eqn, self.times, self.MCF, p0=guess)
        alpha = fit[0][0]
        beta = fit[0][1]
        var_alpha = fit[1][0][
            0]  # curve_fit returns the variance and covariance from the optimizer
        var_beta = fit[1][1][1]
        cov_alpha_beta = fit[1][0][1]

        Z = -ss.norm.ppf((1 - CI) / 2)
        self.alpha = alpha
        self.alpha_SE = var_alpha**0.5
        self.beta = beta
        self.beta_SE = var_beta**0.5
        self.cov_alpha_beta = cov_alpha_beta
        self.alpha_upper = self.alpha * (np.exp(Z *
                                                (self.alpha_SE / self.alpha)))
        self.alpha_lower = self.alpha * (np.exp(-Z *
                                                (self.alpha_SE / self.alpha)))
        self.beta_upper = self.beta * (np.exp(Z * (self.beta_SE / self.beta)))
        self.beta_lower = self.beta * (np.exp(-Z * (self.beta_SE / self.beta)))

        Data = {
            'Parameter': ['Alpha', 'Beta'],
            'Point Estimate': [self.alpha, self.beta],
            'Standard Error': [self.alpha_SE, self.beta_SE],
            'Lower CI': [self.alpha_lower, self.beta_lower],
            'Upper CI': [self.alpha_upper, self.beta_upper]
        }
        self.results = pd.DataFrame(Data,
                                    columns=[
                                        'Parameter', 'Point Estimate',
                                        'Standard Error', 'Lower CI',
                                        'Upper CI'
                                    ])

        if print_results is True:
            CI_rounded = CI * 100
            if CI_rounded % 1 == 0:
                CI_rounded = int(CI * 100)
            colorprint(str('Mean Cumulative Function Parametric Model (' +
                           str(CI_rounded) + '% CI):'),
                       bold=True,
                       underline=True)
            print('MCF = (t/α)^β')
            print(self.results.to_string(index=False), '\n')
            if self.beta_upper <= 1:
                print(
                    'Since Beta is less than 1, the system repair rate is IMPROVING over time.'
                )
            elif self.beta_lower < 1 and self.beta_upper > 1:
                print(
                    'Since Beta is approximately 1, the system repair rate is remaining CONSTANT over time.'
                )
            else:
                print(
                    'Since Beta is greater than 1, the system repair rate is WORSENING over time.'
                )

        if show_plot is True:
            if 'color' in kwargs:
                color = kwargs.pop('color')
                marker_color = 'k'
            else:
                color = 'steelblue'
                marker_color = 'k'

            if 'marker' in kwargs:
                marker = kwargs.pop('marker')
            else:
                marker = '.'

            if 'label' in kwargs:
                label = kwargs.pop('label')
            else:
                label = r'$\hat{MCF} = (\frac{t}{\alpha})^\beta$'

            x_line = np.linspace(0.001, max(self.times) * 10, 1000)
            y_line = (x_line / alpha)**beta
            plt.plot(x_line, y_line, color=color, label=label, **kwargs)

            if plot_CI is True:
                p1 = -(beta / alpha) * (x_line / alpha)**beta
                p2 = ((x_line / alpha)**beta) * np.log(x_line / alpha)
                var = var_alpha * p1**2 + var_beta * p2**2 + 2 * p1 * p2 * cov_alpha_beta
                SD = var**0.5
                y_line_lower = y_line * np.exp((-Z * SD) / y_line)
                y_line_upper = y_line * np.exp((Z * SD) / y_line)
                plt.fill_between(x_line,
                                 y_line_lower,
                                 y_line_upper,
                                 color=color,
                                 alpha=0.3,
                                 linewidth=0)

            plt.scatter(self.times,
                        self.MCF,
                        marker=marker,
                        color=marker_color,
                        **kwargs)
            plt.ylabel('Mean cumulative number of failures')
            plt.xlabel('Time')
            title_str = str(
                'Parametric estimate of the Mean Cumulative Function\n' +
                r'$MCF = (\frac{t}{\alpha})^\beta$ with α=' +
                str(round(alpha, 4)) + ', β=' + str(round(beta, 4)))
            plt.xlim(0, max(self.times) * 1.2)
            plt.ylim(0, max(self.MCF) * 1.4)
            plt.title(title_str)
示例#18
0
    def __init__(self,
                 times=None,
                 xmax=None,
                 target_MTBF=None,
                 show_plot=True,
                 print_results=True,
                 **kwargs):
        if times is None:
            raise ValueError("times must be an array or list of failure times")
        if type(times) == list:
            times = np.sort(np.array(times))
        elif type(times) == np.ndarray:
            times = np.sort(times)
        else:
            raise ValueError("times must be an array or list of failure times")
        if min(times) < 0:
            raise ValueError(
                "failure times cannot be negative. times must be an array or list of failure times"
            )
        if xmax is None:
            xmax = int(max(times) * 1.5)
        if "color" in kwargs:
            c = kwargs.pop("color")
        else:
            c = "steelblue"

        N = np.arange(1, len(times) + 1)
        theta_c = times / N
        ln_t = np.log(times)
        ln_theta_c = np.log(theta_c)
        z = np.polyfit(
            ln_t, ln_theta_c, 1
        )  # fit a straight line to the data to get the parameters lambda and beta
        beta = 1 - z[0]
        Lambda = np.exp(-z[1])
        xvals = np.linspace(0, xmax, 1000)
        theta_i = (xvals**(1 - beta)) / (Lambda * beta)  # the smooth line
        theta_i_points = (times**(1 - beta)) / (
            Lambda * beta)  # the failure times highlighted along the line
        self.Lambda = Lambda
        self.Beta = beta

        if print_results is True:
            colorprint("Reliability growth model parameters:",
                       bold=True,
                       underline=True)
            print("lambda:", Lambda)
            print("beta:", beta)

        if target_MTBF is not None:
            t_target = (target_MTBF * Lambda * beta)**(1 / (1 - beta))
            self.time_to_target = t_target
            print("Time to reach target MTBF:", t_target)
        else:
            self.time_to_target = "specify a target to obtain the time_to_target"

        if show_plot is True:
            plt.plot(xvals, theta_i, color=c, **kwargs)
            plt.plot(times, theta_i_points, "o", color=c, alpha=0.5)
            if target_MTBF is not None:
                plt.plot(
                    [0, t_target, t_target],
                    [target_MTBF, target_MTBF, 0],
                    "red",
                    label="Reliability target",
                    linewidth=1,
                )
            plt.title("Reliability Growth")
            plt.xlabel("Total time on test")
            plt.ylabel("Instantaneous MTBF")
            plt.xlim([0, max(xvals)])
            plt.ylim([0, max(theta_i) * 1.2])
def two_proportion_test(
    sample_1_trials=None,
    sample_1_successes=None,
    sample_2_trials=None,
    sample_2_successes=None,
    CI=0.95,
    print_results=True,
):
    """
    Calculates whether the difference in test results between two samples is statistically significant. For example, assume we have
    a poll of respondents in which 27/40 people agreed, and another poll in which 42/80 agreed. This test will determine if the difference
    is statistically significant for the given sample sizes at the specified confidence level.

    inputs:
    sample_1_trials - number of trials in the first sample
    sample_1_successes - number of successes in the first sample
    sample_2_trials - number of trials in the second sample
    sample_2_successes - number of successes in the second sample
    CI - desired confidence interval. Defaults to 0.95 for 95% CI.
    print_results - if True the results will be printed to the console.

    returns:
    lower,upper,result - lower and upper are bounds on the difference. If the bounds do not include 0 then it is a statistically significant difference.
    """
    if CI < 0.5 or CI >= 1:
        raise ValueError("CI must be between 0.5 and 1. Default is 0.95")
    if (sample_1_trials is None or sample_1_successes is None
            or sample_2_trials is None or sample_2_successes is None):
        raise ValueError(
            "You must specify the number of trials and successes for both samples."
        )
    if sample_1_successes > sample_1_trials or sample_2_successes > sample_2_trials:
        raise ValueError("successes cannot be greater than trials")
    p1 = sample_1_successes / sample_1_trials
    p2 = sample_2_successes / sample_2_trials
    diff = p1 - p2
    Z = ss.norm.ppf(1 - ((1 - CI) / 2))
    k = (Z * ((p1 * (1 - p1) / sample_1_trials) +
              (p2 * (1 - p2) / sample_2_trials))**0.5)
    lower = diff - k
    upper = diff + k
    if lower < 0 and upper > 0:
        result = "non-significant"
        contains_zero_string = "contain 0"
    else:
        result = "significant"
        contains_zero_string = "do not contain 0"

    CI_rounded = CI * 100
    if CI_rounded % 1 == 0:
        CI_rounded = int(CI_rounded)

    if print_results is True:
        colorprint("Results from two_proportion_test:",
                   bold=True,
                   underline=True)
        print(
            "Sample 1 test results (successes/tests):",
            str(str(sample_1_successes) + "/" + str(sample_1_trials)),
        )
        print(
            "Sample 2 test results (successes/tests):",
            str(str(sample_2_successes) + "/" + str(sample_2_trials)),
        )
        print(
            "The",
            str(str(CI_rounded) + "%"),
            "confidence bounds on the difference in these results is:",
            lower,
            "to",
            upper,
        )
        print(
            "Since the confidence bounds",
            contains_zero_string,
            "the result is statistically",
            str(result + "."),
        )

    return lower, upper, result
示例#20
0
    def __init__(self,
                 data,
                 CI=0.95,
                 plot_CI=True,
                 print_results=True,
                 show_plot=True,
                 **kwargs):

        if CI <= 0 or CI >= 1:
            raise ValueError(
                "CI must be between 0 and 1. Default is 0.95 for 95% Confidence interval."
            )

        MCF_NP = MCF_nonparametric(
            data=data, print_results=False, show_plot=False
        )  # all the MCF calculations to get the plot points are done in MCF_nonparametric
        self.times = MCF_NP.time
        self.MCF = MCF_NP.MCF

        # initial guess using least squares regression of linearised function
        # we must convert this back to list due to an issue within numpy dealing with the log of floats
        ln_x = np.log(list(self.times))
        ln_y = np.log(list(self.MCF))
        guess_fit = np.polyfit(ln_x, ln_y, deg=1)
        beta_guess = guess_fit[0]
        alpha_guess = np.exp(-guess_fit[1] / beta_guess)
        guess = [
            alpha_guess,
            beta_guess,
        ]  # guess for curve_fit. This guess is good but curve fit makes it much better.

        # actual fitting using curve_fit with initial guess from least squares
        def __MCF_eqn(t, a, b):  # objective function for curve_fit
            return (t / a)**b

        fit = curve_fit(__MCF_eqn, self.times, self.MCF, p0=guess)
        alpha = fit[0][0]
        beta = fit[0][1]
        var_alpha = fit[1][0][
            0]  # curve_fit returns the variance and covariance from the optimizer
        var_beta = fit[1][1][1]
        cov_alpha_beta = fit[1][0][1]

        Z = -ss.norm.ppf((1 - CI) / 2)
        self.alpha = alpha
        self.alpha_SE = var_alpha**0.5
        self.beta = beta
        self.beta_SE = var_beta**0.5
        self.cov_alpha_beta = cov_alpha_beta
        self.alpha_upper = self.alpha * (np.exp(Z *
                                                (self.alpha_SE / self.alpha)))
        self.alpha_lower = self.alpha * (np.exp(-Z *
                                                (self.alpha_SE / self.alpha)))
        self.beta_upper = self.beta * (np.exp(Z * (self.beta_SE / self.beta)))
        self.beta_lower = self.beta * (np.exp(-Z * (self.beta_SE / self.beta)))

        Data = {
            "Parameter": ["Alpha", "Beta"],
            "Point Estimate": [self.alpha, self.beta],
            "Standard Error": [self.alpha_SE, self.beta_SE],
            "Lower CI": [self.alpha_lower, self.beta_lower],
            "Upper CI": [self.alpha_upper, self.beta_upper],
        }
        self.results = pd.DataFrame(
            Data,
            columns=[
                "Parameter",
                "Point Estimate",
                "Standard Error",
                "Lower CI",
                "Upper CI",
            ],
        )

        if print_results is True:
            CI_rounded = CI * 100
            if CI_rounded % 1 == 0:
                CI_rounded = int(CI * 100)
            colorprint(
                str("Mean Cumulative Function Parametric Model (" +
                    str(CI_rounded) + "% CI):"),
                bold=True,
                underline=True,
            )
            print("MCF = (t/α)^β")
            print(self.results.to_string(index=False), "\n")
            if self.beta_upper <= 1:
                print(
                    "Since Beta is less than 1, the system repair rate is IMPROVING over time."
                )
            elif self.beta_lower < 1 and self.beta_upper > 1:
                print(
                    "Since Beta is approximately 1, the system repair rate is remaining CONSTANT over time."
                )
            else:
                print(
                    "Since Beta is greater than 1, the system repair rate is WORSENING over time."
                )

        if show_plot is True:
            if "color" in kwargs:
                color = kwargs.pop("color")
                marker_color = "k"
            else:
                color = "steelblue"
                marker_color = "k"

            if "marker" in kwargs:
                marker = kwargs.pop("marker")
            else:
                marker = "."

            if "label" in kwargs:
                label = kwargs.pop("label")
            else:
                label = r"$\hat{MCF} = (\frac{t}{\alpha})^\beta$"

            x_line = np.linspace(0.001, max(self.times) * 10, 1000)
            y_line = (x_line / alpha)**beta
            plt.plot(x_line, y_line, color=color, label=label, **kwargs)

            if plot_CI is True:
                p1 = -(beta / alpha) * (x_line / alpha)**beta
                p2 = ((x_line / alpha)**beta) * np.log(x_line / alpha)
                var = (var_alpha * p1**2 + var_beta * p2**2 +
                       2 * p1 * p2 * cov_alpha_beta)
                SD = var**0.5
                y_line_lower = y_line * np.exp((-Z * SD) / y_line)
                y_line_upper = y_line * np.exp((Z * SD) / y_line)
                plt.fill_between(
                    x_line,
                    y_line_lower,
                    y_line_upper,
                    color=color,
                    alpha=0.3,
                    linewidth=0,
                )

            plt.scatter(self.times,
                        self.MCF,
                        marker=marker,
                        color=marker_color,
                        **kwargs)
            plt.ylabel("Mean cumulative number of failures")
            plt.xlabel("Time")
            title_str = str(
                "Parametric estimate of the Mean Cumulative Function\n" +
                r"$MCF = (\frac{t}{\alpha})^\beta$ with α=" +
                str(round(alpha, 4)) + ", β=" + str(round(beta, 4)))
            plt.xlim(0, max(self.times) * 1.2)
            plt.ylim(0, max(self.MCF) * 1.4)
            plt.title(title_str)
    def __init__(self,
                 distribution,
                 data,
                 significance=0.05,
                 print_results=True,
                 show_plot=True):

        # ensure the input is a distribution object
        if type(distribution) not in [
                Weibull_Distribution,
                Normal_Distribution,
                Lognormal_Distribution,
                Exponential_Distribution,
                Gamma_Distribution,
                Beta_Distribution,
                Loglogistic_Distribution,
                Gumbel_Distribution,
        ]:
            raise ValueError(
                "distribution must be a probability distribution object from the reliability.Distributions module. First define the distribution using Reliability.Distributions.___"
            )

        if min(data) < 0 and type(distribution) not in [
                Normal_Distribution,
                Gumbel_Distribution,
        ]:
            raise ValueError(
                "data contains values below 0 which is not appropriate when the distribution is not a Normal or Gumbel Distribution"
            )

        if significance <= 0 or significance > 0.5:
            raise ValueError(
                "significance should be between 0 and 0.5. Default is 0.05 which gives 95% confidence"
            )

        # need to sort data to ensure it is ascending
        if type(data) is list:
            data = np.sort(np.array(data))
        elif type(data) is np.ndarray:
            data = np.sort(data)
        else:
            raise ValueError("data must be an array or list")

        n = len(data)
        fitted_cdf = distribution.CDF(xvals=data, show_plot=False)

        i_array = np.arange(1, n + 1)  # array of 1 to n
        Sn = i_array / n  # empirical cdf 1
        Sn_1 = (i_array - 1) / n  # empirical cdf 2
        self.KS_statistic = max(
            np.hstack([abs(fitted_cdf - Sn),
                       abs(fitted_cdf - Sn_1)
                       ]))  # Kolmogorov-Smirnov test statistic
        self.KS_critical_value = ss.kstwo.ppf(q=1 - significance, n=n)

        if self.KS_statistic < self.KS_critical_value:
            self.hypothesis = "ACCEPT"
        else:
            self.hypothesis = "REJECT"

        if print_results is True:
            colorprint("Results from Kolmogorov-Smirnov test:",
                       bold=True,
                       underline=True)
            print("Kolmogorov-Smirnov statistic:", self.KS_statistic)
            print("Kolmogorov-Smirnov critical value:", self.KS_critical_value)
            print(
                "At the",
                significance,
                "significance level, we can",
                self.hypothesis,
                "the hypothesis that the data comes from a",
                distribution.param_title_long,
            )

        if show_plot is True:
            plt.figure("Kolmogorov-Smirnov test")
            Sn_all = np.hstack([Sn_1, 1])
            SN_plot_x = [0]
            SN_plot_y = [0]
            for idx in np.arange(n):  # build the step plot
                SN_plot_x.extend((data[idx], data[idx]))
                SN_plot_y.extend((Sn_all[idx], Sn_all[idx + 1]))
            SN_plot_x.append(max(data) * 1000)
            SN_plot_y.append(1)
            distribution.CDF(label=distribution.param_title_long)
            plt.plot(SN_plot_x, SN_plot_y, label="Empirical CDF")
            xmax = max(distribution.quantile(0.9999), max(data))
            xmin = min(distribution.quantile(0.0001), min(data))
            if (xmin > 0 and xmin / (xmax - xmin) <
                    0.05):  # if xmin is near zero then set it to zero
                xmin = 0
            plt.xlim(xmin, xmax)
            plt.ylim(0, 1.1)
            plt.title(
                "Kolmogorov-Smirnov test\nHypothesised distribution CDF vs empirical CDF of data"
            )
            plt.legend()
            plt.subplots_adjust(top=0.9)
            plt.show()
示例#22
0
    def __init__(self,
                 times=None,
                 xmax=None,
                 target_MTBF=None,
                 show_plot=True,
                 print_results=True,
                 **kwargs):
        """
        Uses the Duane method to find the instantaneous MTBF and produce a reliability growth plot.

        Inputs:
        times - array or list of failure times
        xmax - xlim to plot up to. Default is 1.5*max(times)
        target_MTBF - specify the target MTBF to obtain the total time on test required to reach it.
        show_plot - True/False. Defaults to true. Other keyword arguments are passed to the plot for style
        print_results - True/False. Defaults to True.

        Outputs:
        If print_results is True it will print a summary of the results
        Lambda - the lambda parameter from the Duane model
        Beta - the beta parameter from the Duane model
        time_to_target - Time to target is only returned if target_MTBF is specified.
        If show_plot is True, it will plot the reliability growth. Use plt.show() to show the plot.
        """
        if times is None:
            raise ValueError("times must be an array or list of failure times")
        if type(times) == list:
            times = np.sort(np.array(times))
        elif type(times) == np.ndarray:
            times = np.sort(times)
        else:
            raise ValueError("times must be an array or list of failure times")
        if min(times) < 0:
            raise ValueError(
                "failure times cannot be negative. times must be an array or list of failure times"
            )
        if xmax is None:
            xmax = int(max(times) * 1.5)
        if "color" in kwargs:
            c = kwargs.pop("color")
        else:
            c = "steelblue"

        N = np.arange(1, len(times) + 1)
        theta_c = times / N
        ln_t = np.log(times)
        ln_theta_c = np.log(theta_c)
        z = np.polyfit(
            ln_t, ln_theta_c, 1
        )  # fit a straight line to the data to get the parameters lambda and beta
        beta = 1 - z[0]
        Lambda = np.exp(-z[1])
        xvals = np.linspace(0, xmax, 1000)
        theta_i = (xvals**(1 - beta)) / (Lambda * beta)  # the smooth line
        theta_i_points = (times**(1 - beta)) / (
            Lambda * beta)  # the failure times highlighted along the line
        self.Lambda = Lambda
        self.Beta = beta

        if print_results is True:
            colorprint("Reliability growth model parameters:",
                       bold=True,
                       underline=True)
            print("lambda:", Lambda)
            print("beta:", beta)

        if target_MTBF is not None:
            t_target = (target_MTBF * Lambda * beta)**(1 / (1 - beta))
            self.time_to_target = t_target
            print("Time to reach target MTBF:", t_target)
        else:
            self.time_to_target = "specify a target to obtain the time_to_target"

        if show_plot is True:
            plt.plot(xvals, theta_i, color=c, **kwargs)
            plt.plot(times, theta_i_points, "o", color=c, alpha=0.5)
            if target_MTBF is not None:
                plt.plot(
                    [0, t_target, t_target],
                    [target_MTBF, target_MTBF, 0],
                    "red",
                    label="Reliability target",
                    linewidth=1,
                )
            plt.title("Reliability Growth")
            plt.xlabel("Total time on test")
            plt.ylabel("Instantaneous MTBF")
            plt.xlim([0, max(xvals)])
            plt.ylim([0, max(theta_i) * 1.2])
示例#23
0
def Probability_of_failure(stress,
                           strength,
                           show_distribution_plot=True,
                           print_results=True,
                           warn=True):
    '''
    Stress - Strength Interference
    Given the probability distributions for stress and strength, this module will find the probability of failure due to stress-strength interference.
    Failure is defined as when stress>strength.
    The calculation is achieved using numerical integration.

    Inputs:
    stress - a probability distribution from the Distributions module
    strength - a probability distribution from the Distributions module
    show_distribution_plot - True/False (default is True)
    print_results - True/False (default is True)
    warn - a warning will be issued if both stress and strength are Normal as you should use Probability_of_failure_normdist. You can supress this using warn=False
         - a warning will be issued if the stress.mean > strength.mean as the user may have assigned the distributions to the wrong variables. You can supress this using warn=False

    Returns:
    probability of failure

    Example use:
    from reliability.Distributions import Weibull_Distribution, Gamma_Distribution
    stress = Weibull_Distribution(alpha=2,beta=3,gamma=1)
    strength = Gamma_Distribution(alpha=2,beta=3,gamma=3)
    Probability_of_failure(stress=stress, strength=strength)
    '''

    if type(stress) not in [Weibull_Distribution, Normal_Distribution, Lognormal_Distribution, Exponential_Distribution, Gamma_Distribution, Beta_Distribution, Loglogistic_Distribution, Gumbel_Distribution] \
            or type(strength) not in [Weibull_Distribution, Normal_Distribution, Lognormal_Distribution, Exponential_Distribution, Gamma_Distribution, Beta_Distribution, Loglogistic_Distribution, Gumbel_Distribution]:
        raise ValueError(
            'Stress and Strength must both be probability distributions. First define the distribution using reliability.Distributions.___'
        )
    if type(stress) == Normal_Distribution and type(
            strength
    ) == Normal_Distribution and warn is True:  # supress the warning by setting warn=False
        colorprint(
            'WARNING: If strength and stress are both Normal distributions, it is more accurate to use the exact formula. The exact formula is supported in the function Probability_of_failure_normdist. To supress this warning set warn=False',
            text_color='red')
    if stress.mean > strength.mean and warn == True:
        colorprint(
            'WARNING: The mean of the stress distribution is above the mean of the strength distribution. Please check you have assigned stress and strength to the correct variables. To supress this warning set warn=False',
            text_color='red')

    x = np.linspace(stress.quantile(1e-8), strength.quantile(1 - 1e-8), 1000)
    prob_of_failure = np.trapz(
        stress.PDF(x, show_plot=False) * strength.CDF(x, show_plot=False), x)

    if show_distribution_plot is True:
        xlims = plt.xlim(auto=None)
        xmin = stress.quantile(0.00001)
        xmax = strength.quantile(0.99999)
        if xmin < (xmax - xmin) / 4:
            xmin = 0  # if the lower bound on xmin is near zero (relative to the entire range) then just make it zero
        if type(stress) == Beta_Distribution:
            xmin = 0
        if type(strength) == Beta_Distribution:
            xmax = 1
        xvals = np.linspace(xmin, xmax, 10000)
        stress_PDF = stress.PDF(xvals=xvals, show_plot=False)
        strength_PDF = strength.PDF(xvals=xvals, show_plot=False)
        Y = [
            (min(strength_PDF[i], stress_PDF[i])) for i in range(len(xvals))
        ]  # finds the lower of the two lines which is used as the upper boundary for fill_between
        plt.plot(xvals, stress_PDF, label='Stress')
        plt.plot(xvals, strength_PDF, label='Strength')
        intercept_idx = Y.index(max(Y))
        plt.fill_between(xvals,
                         np.zeros_like(xvals),
                         Y,
                         color='salmon',
                         alpha=1,
                         linewidth=0,
                         linestyle='--')
        plt.fill_between(xvals[0:intercept_idx],
                         strength_PDF[0:intercept_idx],
                         stress_PDF[0:intercept_idx],
                         color='steelblue',
                         alpha=0.3,
                         linewidth=0,
                         linestyle='--')
        plt.fill_between(xvals[intercept_idx::],
                         stress_PDF[intercept_idx::],
                         strength_PDF[intercept_idx::],
                         color='darkorange',
                         alpha=0.3,
                         linewidth=0,
                         linestyle='--')
        failure_text = str('Probability of\nfailure = ' +
                           str(round_to_decimals(prob_of_failure, 4)))
        plt.legend(title=failure_text)
        plt.title('Stress - Strength Interference Plot')
        plt.ylabel('Probability Density')
        plt.xlabel('Stress and Strength Units')
        plt.subplots_adjust(left=0.16)
        if xlims != (0, 1):
            plt.xlim(min(stress.b5, xlims[0]),
                     max(strength.b95, xlims[1]),
                     auto=None)
        else:
            plt.xlim(stress.b5, strength.b95, auto=None)
        plt.ylim(bottom=0, auto=None)

    if print_results is True:
        print('Probability of failure:', prob_of_failure)

    return prob_of_failure
示例#24
0
    def __init__(self,
                 times_between_failures=None,
                 failure_times=None,
                 CI=0.95,
                 test_end=None,
                 show_plot=True,
                 print_results=True,
                 **kwargs):
        if times_between_failures is not None and failure_times is not None:
            raise ValueError(
                "You have specified both times_between_failures and failure times. You can specify one but not both. Use times_between_failures for failure interarrival times, and failure_times for the actual failure times. failure_times should be the same as np.cumsum(times_between_failures)"
            )
        if times_between_failures is not None:
            if any(t <= 0 for t in times_between_failures):
                raise ValueError(
                    "times_between_failures cannot be less than zero")
            if type(times_between_failures) == list:
                ti = times_between_failures
            elif type(times_between_failures) == np.ndarray:
                ti = list(times_between_failures)
            else:
                raise ValueError(
                    "times_between_failures must be a list or array")
        if failure_times is not None:
            if any(t <= 0 for t in failure_times):
                raise ValueError("failure_times cannot be less than zero")
            if type(failure_times) == list:
                failure_times = np.sort(np.array(failure_times))
            elif type(failure_times) == np.ndarray:
                failure_times = np.sort(failure_times)
            else:
                raise ValueError("failure_times must be a list or array")
            failure_times[1:] -= failure_times[:-1].copy(
            )  # this is the opposite of np.cumsum
            ti = list(failure_times)
        if test_end is not None and type(test_end) not in [float, int]:
            raise ValueError(
                "test_end should be a float or int. Use test_end to specify the end time of a test which was not failure terminated."
            )
        if CI <= 0 or CI >= 1:
            raise ValueError(
                "CI must be between 0 and 1. Default is 0.95 for 95% confidence interval."
            )
        if test_end is None:
            tn = sum(ti)
            n = len(ti) - 1
        else:
            tn = test_end
            n = len(ti)
            if tn < sum(ti):
                raise ValueError(
                    "test_end cannot be less than the final test time")

        if "linestyle" in kwargs:
            ls = kwargs.pop("linestyle")
        else:
            ls = "--"
        if "label" in kwargs:
            label_1 = kwargs.pop("label")
        else:
            label_1 = "Failure interarrival times"

        tc = np.cumsum(ti[0:n])
        sum_tc = sum(tc)
        z_crit = ss.norm.ppf((1 - CI) / 2)  # z statistic based on CI
        U = (sum_tc / n - tn / 2) / (tn * (1 / (12 * n))**0.5)
        self.U = U
        self.z_crit = (z_crit, -z_crit)
        results_str = str("Laplace test results: U = " + str(round(U, 3)) +
                          ", z_crit = (" + str(round(z_crit, 2)) + ",+" +
                          str(round(-z_crit, 2)) + ")")

        x = np.arange(1, len(ti) + 1)
        if U < z_crit:
            B = len(ti) / (sum(np.log(tn / np.array(tc))))
            L = len(ti) / (tn**B)
            self.trend = "improving"
            self.Beta_hat = B
            self.Lambda_hat = L
            self.ROCOF = "ROCOF is not provided when trend is not constant. Use Beta_hat and Lambda_hat to calculate ROCOF at a given time t."
            _rocof = L * B * tc**(B - 1)
            MTBF = np.ones_like(tc) / _rocof
            if test_end is not None:
                x_to_plot = x
            else:
                x_to_plot = x[:-1]
        elif U > -z_crit:
            B = len(ti) / (sum(np.log(tn / np.array(tc))))
            L = len(ti) / (tn**B)
            self.trend = "worsening"
            self.Beta_hat = B
            self.Lambda_hat = L
            self.ROCOF = "ROCOF is not provided when trend is not constant. Use Beta_hat and Lambda_hat to calculate ROCOF at a given time t."
            _rocof = L * B * tc**(B - 1)
            MTBF = np.ones_like(tc) / _rocof
            if test_end is not None:
                x_to_plot = x
            else:
                x_to_plot = x[:-1]
        else:
            rocof = (n + 1) / sum(ti)
            self.trend = "constant"
            self.ROCOF = rocof
            self.Beta_hat = "not calculated when trend is constant"
            self.Lambda_hat = "not calculated when trend is constant"
            x_to_plot = x
            MTBF = np.ones_like(x_to_plot) / rocof

        CI_rounded = CI * 100
        if CI_rounded % 1 == 0:
            CI_rounded = int(CI * 100)

        if print_results is True:
            colorprint("Results from ROCOF analysis:",
                       bold=True,
                       underline=True)
            print(results_str)
            if U < z_crit:
                print(
                    str("At " + str(CI_rounded) +
                        "% confidence level the ROCOF is IMPROVING. Assume NHPP."
                        ))
                print(
                    "ROCOF assuming NHPP has parameters: Beta_hat =",
                    round_to_decimals(B, 3),
                    ", Lambda_hat =",
                    round_to_decimals(L, 4),
                )
            elif U > -z_crit:
                print(
                    str("At " + str(CI_rounded) +
                        "% confidence level the ROCOF is WORSENING. Assume NHPP."
                        ))
                print(
                    "ROCOF assuming NHPP has parameters: Beta_hat =",
                    round_to_decimals(B, 3),
                    ", Lambda_hat =",
                    round_to_decimals(L, 4),
                )
            else:
                print(
                    str("At " + str(CI_rounded) +
                        "% confidence level the ROCOF is CONSTANT. Assume HPP."
                        ))
                print(
                    "ROCOF assuming HPP is",
                    round_to_decimals(rocof, 4),
                    "failures per unit time.",
                )

        if show_plot is True:
            plt.plot(x_to_plot, MTBF, linestyle=ls, label="MTBF")
            plt.scatter(x, ti, label=label_1, **kwargs)
            plt.ylabel("Times between failures")
            plt.xlabel("Failure number")
            title_str = str(
                "Failure interarrival times vs failure number\nAt " +
                str(CI_rounded) + "% confidence level the ROCOF is " +
                self.trend.upper())
            plt.title(title_str)
            plt.legend()
示例#25
0
def one_sample_proportion(trials=None,
                          successes=None,
                          CI=0.95,
                          print_results=True):
    '''
    Calculates the upper and lower bounds of reliability for a given number of trials and successes.

    inputs:
    trials - the number of trials which were conducted
    successes - the number of trials which were successful
    CI - the desired confidence interval. Defaults to 0.95 for 95% CI.
    print_results - if True the results will be printed to the console.

    returns: lower, upper - Confidence interval limits.
    '''
    if trials is None or successes is None:
        raise ValueError(
            'You must specify the number of trials and successes.')
    if successes > trials:
        raise ValueError('successes cannot be greater than trials')
    if successes == 0 or successes == trials:  # calculate 1 sided CI in these cases
        n = 1
    else:
        n = 2
    V1_lower = 2 * successes
    V2_lower = 2 * (trials - successes + 1)
    alpha_lower = (1 - CI) / n
    F_lower = ss.f.ppf(alpha_lower, V1_lower, V2_lower)
    LOWER_LIM = (V1_lower * F_lower) / (V2_lower + V1_lower * F_lower)

    LOWER_LIM = np.nan_to_num(LOWER_LIM, nan=0)
    if LOWER_LIM == 0:
        LOWER_LIM = int(0)

    V1_upper = 2 * (successes + 1)
    V2_upper = 2 * (trials - successes)
    alpha_upper = 1 - alpha_lower
    F_upper = ss.f.ppf(alpha_upper, V1_upper, V2_upper)
    UPPER_LIM = (V1_upper * F_upper) / (V2_upper + V1_upper * F_upper)

    UPPER_LIM = np.nan_to_num(UPPER_LIM, nan=1)
    if UPPER_LIM == 1:
        UPPER_LIM = int(1)

    CI_rounded = CI * 100
    if CI_rounded % 1 == 0:
        CI_rounded = int(CI_rounded)

    if print_results is True:
        colorprint('Results from one_sample_proportion:',
                   bold=True,
                   underline=True)
        print('For a test with', trials, 'trials of which there were',
              successes, 'successes and', trials - successes,
              'failures, the bounds on reliability are:')
        print('Lower', str(str(CI_rounded) + '%'), 'confidence bound:',
              LOWER_LIM)
        print('Upper', str(str(CI_rounded) + '%'), 'confidence bound:',
              UPPER_LIM)

    return LOWER_LIM, UPPER_LIM  # will return nan for lower or upper if only one sided CI is calculated (ie. when successes=0 or successes=trials).
示例#26
0
    def __init__(self,
                 failures=None,
                 right_censored=None,
                 print_results=True,
                 a=None,
                 show_plot=True,
                 plot_CI=True,
                 CI=0.95,
                 plot_type="SF",
                 **kwargs):

        if failures is None:
            raise ValueError(
                "failures must be provided to calculate non-parametric estimates."
            )
        if right_censored is None:
            right_censored = [
            ]  # create empty array so it can be added in hstack
        if plot_type not in ["CDF", "SF", "CHF", "cdf", "sf", "chf"]:
            raise ValueError(
                "plot_type must be CDF, SF, or CHF. Default is SF.")
        if CI < 0 or CI > 1:
            raise ValueError(
                "CI must be between 0 and 1. Default is 0.95 for 95% confidence intervals."
            )
        if len(failures) < 2:
            raise ValueError(
                str("failures has a length of " + str(len(failures)) +
                    ". The minimum acceptable number of failures is 2"))

        # turn the failures and right censored times into a two lists of times and censoring codes
        times = np.hstack([failures, right_censored])
        F = np.ones_like(failures)
        RC = np.zeros_like(
            right_censored)  # censored values are given the code of 0
        cens_code = np.hstack([F, RC])
        Data = {"times": times, "cens_code": cens_code}
        df = pd.DataFrame(Data, columns=["times", "cens_code"])
        df2 = df.sort_values(by="times")
        d = df2["times"].values
        c = df2["cens_code"].values
        n = len(d)  # number of items
        failures_array = np.arange(1,
                                   n + 1)  # array of number of items (1 to n)
        remaining_array = failures_array[::-1]  # items remaining (n to 1)

        # obtain the rank adjustment estimates
        from reliability.Probability_plotting import (
            plotting_positions,
        )  # can't have this at the start of the function because of circular import

        x, y = plotting_positions(failures=failures,
                                  right_censored=right_censored,
                                  a=a)
        # create the stepwise plot using the plotting positions
        x_array = [0]
        y_array = [0]
        for i in range(len(x)):
            x_array.extend([x[i], x[i]])
            if i == 0:
                y_array.extend([0, y[i]])
            else:
                y_array.extend([y[i - 1], y[i]])
        if c[-1] == 0:  # repeat the last value if censored
            x_array.append(d[-1])
            y_array.append(y_array[-1])

        # convert the plotting positions (which are only for the failures) into the full Rank Adjustment column by adding the values for the censored data
        RA = []
        y_extended = [0]
        y_extended.extend(
            y
        )  # need to add 0 to the start of the plotting positions since the CDF always starts at 0
        failure_counter = 0
        RA_upper = []  # upper CI
        RA_lower = []  # lower CI
        z = ss.norm.ppf(1 - (1 - CI) / 2)
        frac = []
        delta = 0
        for i in failures_array:  # failures array is 1 to n
            cens = c[i - 1]
            if cens == 1:  # censored values = 0. failures = 1
                failure_counter += 1
            RA.append(
                1 - y_extended[failure_counter]
            )  # RA is equivalent to the Survival function but not the stepwise version of the data. Just 1 point for each failure or right censored datapoint

            # greenwood confidence interval calculations. Uses Normal approximation (same method as Minitab uses for Kaplan-Meier)
            if c[i - 1] == 1:
                risk_set = n - i + 1
                frac.append(1 / ((risk_set) * (risk_set - 1)))
                sumfrac = sum(frac)
                R2 = RA[i - 1]**2
                if R2 > 0:  # required if the last piece of data is a failure
                    delta = ((sumfrac * R2)**0.5) * z
                else:
                    delta = 0
            RA_upper.append(RA[i - 1] + delta)
            RA_lower.append(RA[i - 1] - delta)
        RA_lower = np.array(RA_lower)
        RA_upper = np.array(RA_upper)
        RA_upper[RA_upper > 1] = 1
        RA_lower[RA_lower < 0] = 0

        # create the stepwise plot for the confidence intervals.
        # first we downsample the RA_lower and RA_upper. This converts the RA_upper and RA_lower to only arrays corresponding the the values where there are failures
        RA_lower_downsample = [1]  # reliability starts at 1
        RA_upper_downsample = [1]
        for i in range(len(RA)):
            if c[i] != 0:  # this means the current item is a failure
                RA_lower_downsample.append(RA_lower[i])
                RA_upper_downsample.append(RA_upper[i])
        # then we upsample by converting to stepwise plot. Essentially this is just repeating each value twice in the downsampled arrays
        RA_y_lower = []
        RA_y_upper = []
        for i in range(len(RA_lower_downsample)):
            RA_y_lower.extend([RA_lower_downsample[i], RA_lower_downsample[i]])
            RA_y_upper.extend([RA_upper_downsample[i], RA_upper_downsample[i]])
        if (
                c[-1] == 1
        ):  # if the last value is a failure we need to remove the last element as the plot ends in a vertical line not a horizontal line
            RA_y_lower = RA_y_lower[0:-1]
            RA_y_upper = RA_y_upper[0:-1]

        self.RA = RA  # these are the values from the dataframe. 1 value for each time (failure or right censored). RA is for "rank adjustment" just as KM is "Kaplan-Meier"
        self.xvals = x_array
        self.SF = 1 - np.array(
            y_array)  # these are the stepwise values for the plot.
        self.SF_lower = np.array(RA_y_lower)
        self.SF_upper = np.array(RA_y_upper)
        self.CDF = np.array(y_array)
        self.CDF_lower = 1 - self.SF_upper
        self.CDF_upper = 1 - self.SF_lower
        self.CHF = -np.log(self.SF)
        self.CHF_lower = -np.log(self.SF_upper)
        self.CHF_upper = -np.log(self.SF_lower)  # this will be inf when SF=0

        # assemble the pandas dataframe for the output
        DATA = {
            "Failure times": d,
            "Censoring code (censored=0)": c,
            "Items remaining": remaining_array,
            "Rank Adjustment Estimate": self.RA,
            "Lower CI bound": RA_lower,
            "Upper CI bound": RA_upper,
        }
        self.results = pd.DataFrame(
            DATA,
            columns=[
                "Failure times",
                "Censoring code (censored=0)",
                "Items remaining",
                "Rank Adjustment Estimate",
                "Lower CI bound",
                "Upper CI bound",
            ],
        )

        CI_rounded = CI * 100
        if CI_rounded % 1 == 0:
            CI_rounded = int(CI * 100)

        if print_results is True:
            colorprint(
                str("Results from RankAdjustment (" + str(CI_rounded) +
                    "% CI):"),
                bold=True,
                underline=True,
            )
            print(self.results.to_string(index=False), "\n")
        if show_plot is True:
            xlim_upper = plt.xlim(auto=None)[1]
            xmax = max(d)
            if plot_type in ["SF", "sf"]:
                p = plt.plot(self.xvals, self.SF, **kwargs)
                if plot_CI is True:  # plots the confidence bounds
                    title_text = str("Rank-Adjustment SF estimate\n with " +
                                     str(CI_rounded) + "% confidence bounds")
                    plt.fill_between(
                        self.xvals,
                        self.SF_lower,
                        self.SF_upper,
                        color=p[0].get_color(),
                        alpha=0.3,
                        linewidth=0,
                    )
                else:
                    title_text = "Rank Adjustment estimate of Survival Function"
                plt.xlabel("Failure units")
                plt.ylabel("Reliability")
                plt.title(title_text)
                plt.xlim([0, max(xmax, xlim_upper)])
                plt.ylim([0, 1.1])
            elif plot_type in ["CDF", "cdf"]:
                p = plt.plot(self.xvals, self.CDF, **kwargs)
                if plot_CI is True:  # plots the confidence bounds
                    title_text = str("Rank Adjustment CDF estimate\n with " +
                                     str(CI_rounded) + "% confidence bounds")
                    plt.fill_between(
                        self.xvals,
                        self.CDF_lower,
                        self.CDF_upper,
                        color=p[0].get_color(),
                        alpha=0.3,
                        linewidth=0,
                    )
                else:
                    title_text = (
                        "Rank Adjustment estimate of Cumulative Density Function"
                    )
                plt.xlabel("Failure units")
                plt.ylabel("Fraction Failing")
                plt.title(title_text)
                plt.xlim([0, max(xmax, xlim_upper)])
                plt.ylim([0, 1.1])
            elif plot_type in ["CHF", "chf"]:
                ylims = plt.ylim(
                    auto=None
                )  # get the existing ylims so other plots are considered when setting the limits
                p = plt.plot(self.xvals, self.CHF, **kwargs)
                CHF_upper = np.nan_to_num(self.CHF_upper, posinf=1e10)
                if plot_CI is True:  # plots the confidence bounds
                    title_text = str("Rank Adjustment CHF estimate\n with " +
                                     str(CI_rounded) + "% confidence bounds")
                    plt.fill_between(
                        self.xvals,
                        self.CHF_lower,
                        CHF_upper,
                        color=p[0].get_color(),
                        alpha=0.3,
                        linewidth=0,
                    )
                else:
                    title_text = (
                        "Rank Adjustment estimate of Cumulative Hazard Function"
                    )
                plt.xlabel("Failure units")
                plt.ylabel("Cumulative Hazard")
                plt.title(title_text)
                plt.xlim([0, max(xmax, xlim_upper)])
                plt.ylim(
                    [0, max(ylims[1], self.CHF[-2] * 1.2)]
                )  # set the limits for y. Need to do this because the upper CI bound is inf.
            else:
                raise ValueError("plot_type must be CDF, SF, CHF")
示例#27
0
def reliability_test_duration(MTBF_required,
                              MTBF_design,
                              consumer_risk,
                              producer_risk,
                              one_sided=True,
                              time_terminated=True,
                              show_plot=True,
                              print_results=True):
    '''
    reliability_test_duration

    Calculates the required duration for a reliability test to achieve the specified producers and consumers risks.
    This is done based on the specified MTBF required and MTBF design.

    Inputs:
    MTBF_required - the required MTBF that the equipment must demonstrate during the test
    MTBF_design - the design target for the MTBF that the producer aims to achieve
    consumer_risk - the risk the consumer is accepting. This is the probability that a bad product will be accepted as a good product by the consumer.
    producer_risk - the risk the producer is accepting. This is the probability that a good product will be rejected as a bad product by the consumer.
    one_sided - default is True. The risk is analogous to the confidence interval, and the confidence interval can be one sided or two sided.
    time_terminated - default is True. whether the test is time terminated or failure terminated. Typically it will be time terminated if the required test duration is sought.
    show_plot - True/False. Default is True. This will create a plot of the risk vs test duration. Use plt.show() to show it.
    print_results - True/False. Default is True. This will print the results to the console.

    Returns:
    test_duration
    '''

    if consumer_risk <= 0 or consumer_risk > 0.5:
        raise ValueError('consumer_risk must be between 0 and 0.5')
    if producer_risk <= 0 or producer_risk > 0.5:
        raise ValueError('producer_risk must be between 0 and 0.5')
    if MTBF_design <= MTBF_required:
        raise ValueError('MTBF_design must exceed MTBF_required')
    if one_sided not in [True, False]:
        raise ValueError('one_sided must be True or False. Default is True')
    if time_terminated not in [True, False]:
        raise ValueError(
            'time_terminated must be True or False. Default is True')
    if show_plot not in [True, False]:
        raise ValueError('show_plot must be True or False. Default is True')
    if print_results not in [True, False]:
        raise ValueError(
            'print_results must be True or False. Default is True')

    duration_array = []
    producer_risk_array = []
    failures = 0  # initial counter. Incremented each iteration
    solution_index = False  # initial vlue to be updated later
    max_failures = 1e10  # initial value to be updated later
    event_check = False
    time_start = time.time()
    time_out = 10  # seconds until first warning about long runtime
    while True:
        result1 = reliability_test_planner(
            number_of_failures=failures,
            CI=1 - consumer_risk,
            MTBF=MTBF_required,
            one_sided=one_sided,
            time_terminated=time_terminated,
            print_results=False
        )  # finds the test duration based on MTBF required and consumer risk
        result2 = reliability_test_planner(
            MTBF=MTBF_design,
            test_duration=result1.test_duration,
            number_of_failures=failures,
            one_sided=one_sided,
            time_terminated=time_terminated,
            print_results=False
        )  # finds the producer risk based on test duration and MTBR of design
        duration_array.append(result1.test_duration)
        producer_risk_array.append(result2.CI)
        if producer_risk_array[
                -1] < producer_risk and event_check is False:  # check whether the number of failures resulted in the correct producer risk
            solution_index = failures - 1  # we have exceeded the target so need to go back one to find the point it was below, and one more to find the point it was above
            max_failures = solution_index * 1.5
            event_check = True
        if failures > max_failures:
            break
        failures += 1  # increment failures
        if time.time() - time_start > time_out:
            colorprint(str(
                'WARNING: The algorithm is taking a long time to find the solution. This is probably because MTBF_required is too close to MTBF_design so the item struggles to pass the test. --- Current runtime: '
                + str(int(round(time.time() - time_start, 0))) + ' seconds'),
                       text_color='red')
            time_out += 10

    duration_solution = duration_array[solution_index]
    if print_results is True:
        if time_terminated is True:
            print(
                '\nReliability Test Duration Solver for time-terminated test')
        else:
            print(
                '\nReliability Test Duration Solver for failure-terminated test'
            )
        print('Required test duration:', duration_solution)
        print("Specified consumer's risk:", consumer_risk)
        print("Specified producer's risk:", producer_risk)
        print('Specified MTBF required by the consumer:', MTBF_required)
        print('Specified MTBF designed to by the producer:', MTBF_design)

    if show_plot is True:
        consumer_risk_array = np.ones_like(duration_array) * consumer_risk
        plt.plot(duration_array, producer_risk_array, label="Producer's risk")
        plt.plot(duration_array, consumer_risk_array, label="Consumer's risk")
        plt.scatter(duration_array,
                    producer_risk_array,
                    color='k',
                    marker='.',
                    label='Failure events')

        plt.xlabel('Test duration')
        plt.ylabel('Risk')
        plt.legend(loc='upper right')
        if len(duration_array) > 1:
            plt.xlim(min(duration_array), max(duration_array))
        plt.axvline(x=duration_solution,
                    color='k',
                    linestyle='--',
                    linewidth=1)
        plt.title("Test duration vs Producer's and Consumer's Risk")
        plt.text(x=duration_solution,
                 y=plt.ylim()[0],
                 s=str(' Test duration\n ' +
                       str(int(math.ceil(duration_solution)))),
                 va='bottom')
    return duration_solution
示例#28
0
    def __init__(self,
                 failures=None,
                 right_censored=None,
                 show_plot=True,
                 print_results=True,
                 plot_CI=True,
                 CI=0.95,
                 plot_type="SF",
                 **kwargs):
        np.seterr(
            divide="ignore"
        )  # divide by zero occurs if last detapoint is a failure so risk set is zero

        if failures is None:
            raise ValueError(
                "failures must be provided to calculate non-parametric estimates."
            )
        if right_censored is None:
            right_censored = [
            ]  # create empty array so it can be added in hstack
        if plot_type not in ["CDF", "SF", "CHF", "cdf", "sf", "chf"]:
            raise ValueError(
                "plot_type must be CDF, SF, or CHF. Default is SF.")
        if CI < 0 or CI > 1:
            raise ValueError(
                "CI must be between 0 and 1. Default is 0.95 for 95% confidence intervals."
            )
        if len(failures) < 2:
            raise ValueError(
                str("failures has a length of " + str(len(failures)) +
                    ". The minimum acceptable number of failures is 2"))

        # turn the failures and right censored times into a two lists of times and censoring codes
        times = np.hstack([failures, right_censored])
        F = np.ones_like(failures)
        RC = np.zeros_like(
            right_censored)  # censored values are given the code of 0
        cens_code = np.hstack([F, RC])
        Data = {"times": times, "cens_code": cens_code}
        df = pd.DataFrame(Data, columns=["times", "cens_code"])
        df2 = df.sort_values(by="times")
        d = df2["times"].values
        c = df2["cens_code"].values

        self.data = d
        self.censor_codes = c

        n = len(d)  # number of items
        failures_array = np.arange(1,
                                   n + 1)  # array of number of items (1 to n)
        remaining_array = failures_array[::-1]  # items remaining (n to 1)
        KM = []  # Survival function
        KM_upper = []  # upper CI
        KM_lower = []  # lower CI
        z = ss.norm.ppf(1 - (1 - CI) / 2)
        frac = []
        delta = 0
        for i in failures_array:
            if i == 1:
                KM.append((remaining_array[i - 1] - c[i - 1]) /
                          remaining_array[i - 1])
            else:
                KM.append(((remaining_array[i - 1] - c[i - 1]) /
                           remaining_array[i - 1]) * KM[i - 2])
            # greenwood confidence interval calculations. Uses Normal approximation (same method as in Minitab)
            if c[i - 1] == 1:
                risk_set = n - i + 1
                frac.append(1 / ((risk_set) * (risk_set - 1)))
                sumfrac = sum(frac)
                R2 = KM[i - 1]**2
                if R2 > 0:  # required if the last piece of data is a failure
                    delta = ((sumfrac * R2)**0.5) * z
                else:
                    delta = 0
            KM_upper.append(KM[i - 1] + delta)
            KM_lower.append(KM[i - 1] - delta)
        KM_lower = np.array(KM_lower)
        KM_upper = np.array(KM_upper)
        KM_upper[KM_upper > 1] = 1
        KM_lower[KM_lower < 0] = 0

        # assemble the pandas dataframe for the output
        DATA = {
            "Failure times": d,
            "Censoring code (censored=0)": c,
            "Items remaining": remaining_array,
            "Kaplan-Meier Estimate": KM,
            "Lower CI bound": KM_lower,
            "Upper CI bound": KM_upper,
        }
        self.results = pd.DataFrame(
            DATA,
            columns=[
                "Failure times",
                "Censoring code (censored=0)",
                "Items remaining",
                "Kaplan-Meier Estimate",
                "Lower CI bound",
                "Upper CI bound",
            ],
        )
        self.KM = KM

        KM_x = [0]
        KM_y = [1]  # adds a start point for 100% reliability at 0 time
        KM_y_upper = []
        KM_y_lower = []

        for i in failures_array:
            if i == 1:
                if c[i - 1] == 0:  # if the first item is censored
                    KM_x.append(d[i - 1])
                    KM_y.append(1)
                    KM_y_lower.append(1)
                    KM_y_upper.append(1)
                else:  # if the first item is a failure
                    KM_x.append(d[i - 1])
                    KM_x.append(d[i - 1])
                    KM_y.append(1)
                    KM_y.append(KM[i - 1])
                    KM_y_lower.append(1)
                    KM_y_upper.append(1)
                    KM_y_lower.append(1)
                    KM_y_upper.append(1)
            else:
                if KM[i - 2] == KM[i - 1]:  # if the next item is censored
                    KM_x.append(d[i - 1])
                    KM_y.append(KM[i - 1])
                    KM_y_lower.append(KM_lower[i - 2])
                    KM_y_upper.append(KM_upper[i - 2])
                else:  # if the next item is a failure
                    KM_x.append(d[i - 1])
                    KM_y.append(KM[i - 2])
                    KM_y_lower.append(KM_lower[i - 2])
                    KM_y_upper.append(KM_upper[i - 2])
                    KM_x.append(d[i - 1])
                    KM_y.append(KM[i - 1])
                    KM_y_lower.append(KM_lower[i - 2])
                    KM_y_upper.append(KM_upper[i - 2])
        KM_y_lower.append(KM_y_lower[-1])
        KM_y_upper.append(KM_y_upper[-1])
        self.xvals = np.array(KM_x)
        self.SF = np.array(KM_y)
        self.SF_lower = np.array(KM_y_lower)
        self.SF_upper = np.array(KM_y_upper)
        self.CDF = 1 - self.SF
        self.CDF_lower = 1 - self.SF_upper
        self.CDF_upper = 1 - self.SF_lower
        self.CHF = -np.log(self.SF)
        self.CHF_lower = -np.log(self.SF_upper)
        self.CHF_upper = -np.log(self.SF_lower)  # this will be inf when SF=0

        CI_rounded = CI * 100
        if CI_rounded % 1 == 0:
            CI_rounded = int(CI * 100)

        if print_results is True:
            colorprint(
                str("Results from KaplanMeier (" + str(CI_rounded) + "% CI):"),
                bold=True,
                underline=True,
            )
            print(self.results.to_string(index=False), "\n")
        if show_plot is True:
            xlim_upper = plt.xlim(auto=None)[1]
            xmax = max(times)
            if plot_type in ["SF", "sf"]:
                p = plt.plot(self.xvals, self.SF, **kwargs)
                if plot_CI is True:  # plots the confidence bounds
                    title_text = str("Kaplan-Meier SF estimate\n with " +
                                     str(CI_rounded) + "% confidence bounds")
                    plt.fill_between(
                        self.xvals,
                        self.SF_lower,
                        self.SF_upper,
                        color=p[0].get_color(),
                        alpha=0.3,
                        linewidth=0,
                    )
                else:
                    title_text = "Kaplan-Meier estimate of Survival Function"
                plt.xlabel("Failure units")
                plt.ylabel("Reliability")
                plt.title(title_text)
                plt.xlim([0, max(xmax, xlim_upper)])
                plt.ylim([0, 1.1])
            elif plot_type in ["CDF", "cdf"]:
                p = plt.plot(self.xvals, self.CDF, **kwargs)
                if plot_CI is True:  # plots the confidence bounds
                    title_text = str("Kaplan-Meier CDF estimate\n with " +
                                     str(CI_rounded) + "% confidence bounds")
                    plt.fill_between(
                        self.xvals,
                        self.CDF_lower,
                        self.CDF_upper,
                        color=p[0].get_color(),
                        alpha=0.3,
                        linewidth=0,
                    )
                else:
                    title_text = "Kaplan-Meier estimate of Cumulative Density Function"
                plt.xlabel("Failure units")
                plt.ylabel("Fraction Failing")
                plt.title(title_text)
                plt.xlim([0, max(xmax, xlim_upper)])
                plt.ylim([0, 1.1])
            elif plot_type in ["CHF", "chf"]:
                ylims = plt.ylim(
                    auto=None
                )  # get the existing ylims so other plots are considered when setting the limits
                p = plt.plot(self.xvals, self.CHF, **kwargs)
                CHF_upper = np.nan_to_num(self.CHF_upper, posinf=1e10)
                if plot_CI is True:  # plots the confidence bounds
                    title_text = str("Kaplan-Meier CHF estimate\n with " +
                                     str(CI_rounded) + "% confidence bounds")
                    plt.fill_between(
                        self.xvals,
                        self.CHF_lower,
                        CHF_upper,
                        color=p[0].get_color(),
                        alpha=0.3,
                        linewidth=0,
                    )
                else:
                    title_text = "Kaplan-Meier estimate of Cumulative Hazard Function"
                plt.xlabel("Failure units")
                plt.ylabel("Cumulative Hazard")
                plt.title(title_text)
                plt.xlim([0, max(xmax, xlim_upper)])
                plt.ylim(
                    [0, max(ylims[1], self.CHF[-2] * 1.2)]
                )  # set the limits for y. Need to do this because the upper CI bound is inf.
            else:
                raise ValueError("plot_type must be CDF, SF, CHF")
示例#29
0
def two_proportion_test(sample_1_trials=None,
                        sample_1_successes=None,
                        sample_2_trials=None,
                        sample_2_successes=None,
                        CI=0.95,
                        print_results=True):
    '''
    Calculates whether the difference in test results between two samples is statistically significant. For example, assume we have
    a poll of respondents in which 27/40 people agreed, and another poll in which 42/80 agreed. This test will determine if the difference
    is statistically significant for the given sample sizes at the specified confidence level.

    inputs:
    sample_1_trials - number of trials in the first sample
    sample_1_successes - number of successes in the first sample
    sample_2_trials - number of trials in the second sample
    sample_2_successes - number of successes in the second sample
    CI - desired confidence interval. Defaults to 0.95 for 95% CI.
    print_results - if True the results will be printed to the console.

    returns:
    lower,upper,result - lower and upper are bounds on the difference. If the bounds do not include 0 then it is a statistically significant difference.
    '''
    if CI < 0.5 or CI >= 1:
        raise ValueError('CI must be between 0.5 and 1. Default is 0.95')
    if sample_1_trials is None or sample_1_successes is None or sample_2_trials is None or sample_2_successes is None:
        raise ValueError(
            'You must specify the number of trials and successes for both samples.'
        )
    if sample_1_successes > sample_1_trials or sample_2_successes > sample_2_trials:
        raise ValueError('successes cannot be greater than trials')
    p1 = sample_1_successes / sample_1_trials
    p2 = sample_2_successes / sample_2_trials
    diff = p1 - p2
    Z = ss.norm.ppf(1 - ((1 - CI) / 2))
    k = Z * ((p1 * (1 - p1) / sample_1_trials) +
             (p2 * (1 - p2) / sample_2_trials))**0.5
    lower = diff - k
    upper = diff + k
    if lower < 0 and upper > 0:
        result = 'non-significant'
        contains_zero_string = 'contain 0'
    else:
        result = 'significant'
        contains_zero_string = 'do not contain 0'

    CI_rounded = CI * 100
    if CI_rounded % 1 == 0:
        CI_rounded = int(CI_rounded)

    if print_results is True:
        colorprint('Results from two_proportion_test:',
                   bold=True,
                   underline=True)
        print('Sample 1 test results (successes/tests):',
              str(str(sample_1_successes) + '/' + str(sample_1_trials)))
        print('Sample 2 test results (successes/tests):',
              str(str(sample_2_successes) + '/' + str(sample_2_trials)))
        print('The', str(str(CI_rounded) + '%'),
              'confidence bounds on the difference in these results is:',
              lower, 'to', upper)
        print('Since the confidence bounds', contains_zero_string,
              'the result is statistically', str(result + '.'))

    return lower, upper, result
示例#30
0
    def __init__(self,
                 cost_PM,
                 cost_CM,
                 weibull_alpha,
                 weibull_beta,
                 show_plot=True,
                 print_results=True,
                 q=0,
                 **kwargs):
        if 'color' in kwargs:
            c = kwargs.pop('color')
        else:
            c = 'steelblue'
        if cost_PM > cost_CM:
            raise ValueError(
                'Cost_PM must be less than Cost_CM otherwise preventative maintenance should not be conducted.'
            )
        if weibull_beta < 1:
            colorprint(
                'WARNING: weibull_beta is < 1 so the hazard rate is decreasing, therefore preventative maintenance should not be conducted.',
                text_color='red')

        if q == 1:  # as good as old
            alpha_multiple = 4  # just used for plot limits
            t = np.linspace(1, weibull_alpha * alpha_multiple, 100000)
            CPUT = ((cost_PM *
                     (t / weibull_alpha)**weibull_beta) + cost_CM) / t
            ORT = weibull_alpha * ((cost_CM /
                                    (cost_PM *
                                     (weibull_beta - 1)))**(1 / weibull_beta))
            min_cost = ((cost_PM *
                         (ORT / weibull_alpha)**weibull_beta) + cost_CM) / ORT
        elif q == 0:  # as good as new
            alpha_multiple = 3
            t = np.linspace(1, weibull_alpha * alpha_multiple, 10000)
            CPUT = []  # cost per unit time
            R = lambda x: np.exp(-((x / weibull_alpha)**weibull_beta))
            for T in t:
                SF = np.exp(-((T / weibull_alpha)**weibull_beta))
                integral_R, error = integrate.quad(R, 0, T)
                CPUT.append((cost_PM * SF + cost_CM * (1 - SF)) / integral_R)
                idx = np.argmin(CPUT)
                min_cost = CPUT[idx]  # minimum cost per unit time
                ORT = t[idx]  # optimal replacement time
        else:
            raise ValueError(
                'q must be 0 or 1. Default is 0. Use 0 for "as good as new" and use 1 for "as good as old".'
            )
        self.ORT = ORT
        self.min_cost = min_cost

        if min_cost < 1:
            min_cost_rounded = round(
                min_cost, -int(np.floor(np.log10(abs(min_cost)))) + 1
            )  # this rounds to exactly 2 sigfigs no matter the number of preceding zeros
        else:
            min_cost_rounded = round(min_cost, 2)
        ORT_rounded = round(ORT, 2)

        if print_results is True:
            if q == 0:
                print('Cost model assuming as good as new replacement (q=0):')
            else:
                print('Cost model assuming as good as old replacement (q=1):')
            print('The minimum cost per unit time is', min_cost_rounded,
                  '\nThe optimal replacement time is', ORT_rounded)

        if show_plot is True:
            plt.plot(t, CPUT, color=c, **kwargs)
            plt.plot(ORT, min_cost, 'o', color=c)
            text_str = str('\nMinimum cost per unit time is ' +
                           str(min_cost_rounded) +
                           '\nOptimal replacement time is ' + str(ORT_rounded))
            plt.text(ORT, min_cost, text_str, verticalalignment='top')
            plt.xlabel('Replacement time')
            plt.ylabel('Cost per unit time')
            plt.title('Optimal replacement time estimation')
            plt.ylim([0, min_cost * 2])
            plt.xlim([0, weibull_alpha * alpha_multiple])