Exemplo n.º 1
0
def approximate_confidence_interval(x):
    a = 1.0*np.array(x)
    n = len(a)
    m, se = np.mean(a), sem(a)
    h = se * norm._ppf((1+0.95)/2)
    return m-h, m+h
def get_Z_alpha(confidence):
    var = 1 - (1 - confidence) / 2
    return norm._ppf(var)
Exemplo n.º 3
0
def make_plot_probit(title, input_data, x_label):
    '''Creates Probit plot for EUR and data that has a log-normal distribution.
    '''
    # Calculate log-normal distribtion for input data
    sigma, floc, scale = lognorm.fit(input_data, floc=0)
    mu = math.log(scale)
    x = np.linspace(0.001, np.max(input_data) + np.mean(input_data), 1000)
    pdf = 1/(x * sigma * np.sqrt(2*np.pi)) * \
        np.exp(-(np.log(x)-mu)**2 / (2*sigma**2))
    cdf = (1 + scipy.special.erf((np.log(x) - mu) / (np.sqrt(2) * sigma))) / 2

    p = figure(title=title, background_fill_color="#fafafa", x_axis_type='log')

    # Prepare input data for plot
    input_data_log = np.log(input_data)
    # Get percentile of each point by getting rank/len(data)
    input_data_log_sorted = np.argsort(input_data_log)
    ranks = np.empty_like(input_data_log_sorted)
    ranks[input_data_log_sorted] = np.arange(len(input_data_log))

    # Add 1 to length of data because norm._ppf(1) is infinite, which will occur for highest ranked value
    input_data_log_perc = [(x + 1) / (len(input_data_log_sorted) + 1)
                           for x in ranks]
    input_data_y_values = norm._ppf(input_data_log_perc)

    # Prepare fitted line for plot
    x_y_values = norm._ppf(cdf)

    # Values to display on y axis instead of z values from ppf
    y_axis = [1 - x for x in cdf]

    # Plot input data values
    p.scatter(input_data,
              input_data_y_values,
              size=15,
              line_color="navy",
              legend="Input Data",
              marker='circle_cross')
    p.line(x, x_y_values, line_width=3, line_color="red", legend="Best Fit")

    # calculate P90, P50, P10
    p10_param = find_nearest(cdf, 0.9)
    p10 = round(x[p10_param[1]])
    p50_param = find_nearest(cdf, 0.5)
    p50 = round(x[p50_param[1]])
    p90_param = find_nearest(cdf, 0.1)
    p90 = round(x[p90_param[1]])

    # Add P90, P50, P10 markers
    p.scatter(p90,
              norm._ppf(0.10),
              size=15,
              line_color="black",
              fill_color='darkred',
              legend=f"P90 = {int(p90)}",
              marker='square_x')
    p.scatter(p50,
              norm._ppf(0.50),
              size=15,
              line_color="black",
              fill_color='blue',
              legend=f"P50 = {int(p50)}",
              marker='square_x')
    p.scatter(p10,
              norm._ppf(0.90),
              size=15,
              line_color="black",
              fill_color='red',
              legend=f"P10 = {int(p10)}",
              marker='square_x')

    # Add P90, P50, P10 segments
    # p.segment(1, norm._ppf(0.10), np.max(x), norm._ppf(0.10), line_dash='dashed', line_width=2, line_color='black', legend="P90")
    # p.segment(1, norm._ppf(0.50), np.max(x), norm._ppf(0.50), line_dash='dashed', line_width=2, line_color='black', legend="P50")
    # p.segment(1, norm._ppf(0.90), np.max(x), norm._ppf(0.90), line_dash='dashed', line_width=2, line_color='black', legend="P10")
    p.segment(p90,
              -4,
              p90,
              np.max(x_y_values),
              line_dash='dashed',
              line_width=2,
              line_color='darkred',
              legend=f"P90 = {int(p90)}")
    p.segment(p50,
              -4,
              p50,
              np.max(x_y_values),
              line_dash='dashed',
              line_width=2,
              line_color='blue',
              legend=f"P50 = {int(p50)}")
    p.segment(p10,
              -4,
              p10,
              np.max(x_y_values),
              line_dash='dashed',
              line_width=2,
              line_color='red',
              legend=f"P10 = {int(p10)}")

    # Find min for x axis
    x_min = int(np.log10(np.min(input_data)))
    power_of_10 = 10**(x_min)

    # Plot Styling
    p.x_range.start = power_of_10
    p.y_range.start = -3
    p.legend.location = "top_left"
    p.legend.background_fill_color = "#fefefe"
    p.xaxis.axis_label = x_label
    p.yaxis.axis_label = 'Z'
    p.left[0].formatter.use_scientific = False
    p.xaxis[0].formatter = NumeralTickFormatter(format="0,0")
    p.yaxis.visible = False
    p.title.text = title
    p.title.align = 'center'
    p.legend.click_policy = "hide"

    return p
Exemplo n.º 4
0
 def _solve_power_for_pct(self, pct):
     z = norm._ppf(pct)
     error = lambda est: self._compute_stouffer_z_at_power(est) - z
     return opt.brentq(error, .0501, .9999)
Exemplo n.º 5
0
 def _compute_stouffer_z(pp):
     isnotnan = ~np.isnan(pp)
     pp_notnan = pp[isnotnan]
     return np.sum(norm._ppf(pp_notnan)) / np.sqrt(isnotnan.sum())
Exemplo n.º 6
0
def gaussian_icdf(h, Xi, x):
    return Xi + h**2 * norm._ppf(x)