def plot_skewed_data(data):
    # generate histogram using data
    num_bins = 200
    bins = np.linspace(min(data), max(data), num_bins)
    _, bins = np.histogram(data, bins=bins, density=True)
    bin_centers = 0.5 * (bins[1:] + bins[:-1])

    # get PDF from data by fitting it to skewnorm
    a, loc, scale = skewnorm.fit(data)
    pdf = st.skewnorm.pdf(bin_centers, a, loc, scale)

    # plot
    kwargs = dict(normed=True, edgecolor='black', linewidth=1.2, alpha=0.5, bins=20, stacked=True)
    # kwargs = dict(alpha=0.5, bins=20, normed=True, histtype='stepfilled', stacked=False, edgecolor='black', linewidth=1.2)
    fig, ax = plt.subplots(1, 1)
    ax.plot(bin_centers, pdf, 'r-', lw=5, alpha=0.6, label='skewnorm pdf')
    # ax.plot(bin_centers, histogram, )
    # Newer version of matplotlib uses density rather than normed
    ax.hist(data, **kwargs, color='g', label="Histogram of samples (normalized)")
    ax.axvline(x=np.percentile(data, 1), color='red', ls=':', lw=2, label='1 pc')
    ax.axvline(x=np.percentile(data, 50), color='red', ls=':', lw=2, label='mean')
    ax.axvline(x=np.percentile(data, 99), color='red', ls=':', lw=2, label='99 pc')
    print("%f :: %f" % (skewnorm.ppf(0.01, a), skewnorm.ppf(0.99, a)))
    ax.legend(loc='best', frameon=True)
    plt.show()
Exemplo n.º 2
0
    def fake_lnlike(p):
        sigma, omega, alpha = p
        eq1 = skewnorm.ppf(0.5, alpha, loc=sigma, scale=omega) - median
        eq2 = skewnorm.ppf(0.16, alpha, loc=sigma, scale=omega) - (median-lower_err)
        eq3 = skewnorm.ppf(0.84, alpha, loc=sigma, scale=omega) - (median+upper_err)
        fake_lnlike = np.log( eq1**2 + eq2**2 + eq3**2 )
#        print fake_lnlike
        return fake_lnlike
def update(val):

    alpha1 = skew.val
    mu1 = mathematic_expectation.val
    sigma1 = standard_deviation.val
    n1 = sample_n.val

    t1 = np.random.uniform(0.0, 1.0, int(n1))
    s1 = skewnorm.ppf(t1, alpha1, mu1, sigma1)
    # s1 = norm.ppf(t1, mu1, sigma1)
    x1, y1 = np.unique(np.around(s1, decimals=1), return_counts=True)

    t2 = np.random.uniform(0.0, 1.0, int(n1))
    s2 = norm.ppf(t2, mu, sigma)
    x2, y2 = np.unique(np.around(s2, decimals=1), return_counts=True)

    t, p1 = np.around(ttest_ind(s1, s2), decimals=3)
    u, p2 = np.around(mannwhitneyu(s1, s2), decimals=3)

    l.set_xdata(x1)
    l.set_ydata(y1)
    l1.set_xdata(x2)
    l1.set_ydata(y2)
    te.set_text("T-test: t = {}, p = {}; Mann-Whitney: U = {}, p = {}".format(t, p1, u, p2))
    fig.canvas.draw_idle()
Exemplo n.º 4
0
    def __init__(self,N,init_range,random_ratio,noisetype=None,R=None,Q=None):
        self.X = np.zeros((N,len(init_range)))
        self.W = np.ones(N)/N
        self.N = N
        self.random_ratio = random_ratio
        self.init_range = init_range
        self.noisetype = noisetype

        if noisetype == 'skewnorm':
            self.Q = Q
            s = np.linspace(skewnorm.ppf(0.01, Q[0], Q[1], Q[2]),skewnorm.ppf(0.99, Q[0], Q[1], Q[2]), 100)
            y = skewnorm.pdf(s, Q[0], Q[1], Q[2])
            self.sensingNoiseMode = s[np.argmax(y)]

        for i,random_range in enumerate(init_range):
            self.X[:,i] = np.random.uniform(random_range[0],random_range[1],N)
Exemplo n.º 5
0
def estimate_distance_cutoff(locs, data_norm, tissue_mat, cutoff=0.0001):
    '''
    Estimate hamming cutoff
    '''

    target_list = list()

    target_mat = np.asarray(tissue_mat)
    # run simulation to check the p of selected cutoff
    # tissue_mat_rand = shuffle(tissue_mat.T).T
    hdist_list = list()
    jdist_list = list()
    hfdist_list = list()
    for rr in np.arange(10):
        # tissue_mat_rand = shuffle(tissue_mat.T).T
        target_mat_rand1 = shuffle(target_mat.T).T
        target_mat_rand2 = shuffle(target_mat.T).T

        hdist_rand = cdist(target_mat_rand1, target_mat_rand2,
                           compute_diff_vs_common_const10)
        hdist_list.append(hdist_rand.flatten())
        jdist_rand = cdist(target_mat_rand1, target_mat_rand2, jaccard_dist)
        jdist_list.append(jdist_rand.flatten())

        for ta1 in range(target_mat.shape[0]):
            for ta2 in range(target_mat.shape[0]):
                target_mat_rand1_locs = locs[target_mat_rand1[ta1] == 1]
                target_mat_rand2_locs = locs[target_mat_rand2[ta2] == 1]
                hfdist_rand = compute_hausdorff(target_mat_rand1_locs,
                                                target_mat_rand2_locs)
                hfdist_list.append(hfdist_rand)
    hflattened = [val for sublist in hdist_list for val in sublist]
    ae, loce, scalee = stats.skewnorm.fit(hflattened)
    hamming_cutoff = skewnorm.ppf(cutoff, ae, loce, scalee)

    jflattened = [val for sublist in jdist_list for val in sublist]
    ae, loce, scalee = stats.skewnorm.fit(jflattened)
    jaccord_cutoff = skewnorm.ppf(cutoff, ae, loce, scalee)

    ae, loce, scalee = stats.skewnorm.fit(hfdist_list)
    hausdorff_cutoff = skewnorm.ppf(1 - cutoff, ae, loce, scalee)

    return hamming_cutoff, jaccord_cutoff, hausdorff_cutoff
Exemplo n.º 6
0
def cal_tot_sf(SFR, SFEN):
    # Skew normal distribution for star formation history
    # took input: maximum star formation rate, star formation event number
    import numpy as np
    from scipy.stats import skewnorm
    # from scipy.stats import f
    global skewness, location
    x = np.linspace(skewnorm.ppf(0.01, skewness, location, 1),
                    skewnorm.ppf(0.99, skewness, location, 1), SFEN)
    y = skewnorm.pdf(x, skewness, location, 1)
    # skewnorm.pdf(x, a, loc, scale) is the location and scale parameters,
    #   [identically equivalent to skewnorm.pdf(y, a) / scale with y = (x - loc) / scale]
    # The scale is not used as the SFEN & SFR setup the scale through parameter tot_sf_set & mult.
    mult = 10**SFR / max(y)
    j = 0
    tot_sf = 0
    while j < SFEN:
        sf = mult * y[j]
        tot_sf += sf
        (j) = (j + 1)
    return tot_sf, mult, y
Exemplo n.º 7
0
def simulate_PDF(median, lower_err, upper_err, size=1, plot=True):
    '''
    Simulates a draw of posterior samples from a value and asymmetric errorbars
    by assuming the underlying distribution is a skewed normal distribution.
    
    Developed to estimate PDFs from literature exoplanet parameters that did not report their MCMC chains.
    
    Inputs:
    -------
    median : float
        the median value that was reported
    lower_err : float
        the lower errorbar that was reported
    upper_err : float
        the upper errorbar that was reported
    size : int
        the number of samples to be drawn
        
    Returns:
    --------
    samples : array of float
        the samples drawn from the simulated skrewed normal distribution
    '''

    sigma, omega, alpha = calculate_skewed_normal_params(
        median, lower_err, upper_err)
    samples = skewnorm.rvs(alpha, loc=sigma, scale=omega, size=size)

    if plot == False:
        return samples

    else:
        lower_err = np.abs(lower_err)
        upper_err = np.abs(upper_err)
        x = np.arange(median - 4 * lower_err, median + 4 * upper_err, 0.01)
        fig = plt.figure()
        for i in range(3):
            plt.axvline([median - lower_err, median, median + upper_err][i],
                        color='k',
                        lw=2)
        plt.plot(x, skewnorm.pdf(x, alpha, loc=sigma, scale=omega), 'r-', lw=2)
        fit_percentiles = skewnorm.ppf([0.16, 0.5, 0.84],
                                       alpha,
                                       loc=sigma,
                                       scale=omega)
        for i in range(3):
            plt.axvline(fit_percentiles[i], color='r', ls='--', lw=2)
        plt.hist(samples, density=True, color='red', alpha=0.5)
        return samples, fig
Exemplo n.º 8
0
    def calc_risk_skewnorm(self, confidence=0.95):
        port_returns = self.returns.dot(self.allocation)
        losses = -port_returns.iloc[:, 0]

        params = skewnorm.fit(losses)
        VaR = skewnorm.ppf(confidence, *params)

        tail_loss = skewnorm.expect(lambda y: y,
                                    args=(params[0], ),
                                    loc=params[1],
                                    scale=params[2],
                                    lb=VaR)
        CVaR = (1 / (1 - confidence)) * tail_loss

        return losses, VaR, CVaR
Exemplo n.º 9
0
def spx_implied_var_single(rolling_window,
                           var_pct,
                           vix,
                           skew,
                           spx,
                           option='P'):
    alpha = -(skew - 100) / 10
    period_vix = (np.sqrt(
        ((vix * vix) / 365) * 1.5) / 100) * np.sqrt(rolling_window)
    if option == 'C':
        var_pct = 1 - var_pct
        pct_var = norm.ppf(var_pct, 0, period_vix)
    else:
        pct_var = skn.ppf(var_pct, alpha, 0, period_vix)
    spx_k_suggestion = spx * np.exp(pct_var)  #(1 + pct_var)
    print('VaR return percent for SPX is: ' + str(round(pct_var * 100, 2)))
    print('Suggested SPX strike: ' + str(np.floor(spx_k_suggestion)))

    return spx_k_suggestion
Exemplo n.º 10
0
def spx_implied_var(rolling_window, var_pct, mkt_time='Close', option='P'):

    # Here it's specifying to use the market Open values so that
    # the worst case will be from market open on trade date to
    # market close on expiry
    if mkt_time == 'Open':
        temp_df = df[[
            'SPX Open', 'SPX Close', 'skew', 'Daily VIX Open',
            'Daily VIX Close', 'VIX Close'
        ]]
        temp_df['spx_shift'] = temp_df['SPX Close'].shift(-rolling_window)
        temp_df['vix_shift'] = temp_df['VIX Close'].shift(-rolling_window)
        del temp_df['SPX Close'], temp_df['Daily VIX Close']
        temp_df.columns = [
            'spx', 'skew', 'vix', 'VIX Close', 'spx_shift', 'vix_shift'
        ]
    else:
        # Here the function will be preparing to perform the usual
        # close to close calculations
        temp_df = df[['SPX Close', 'skew', 'Daily VIX Close', 'VIX Close']]
        temp_df.columns = ['spx', 'skew', 'vix', 'VIX Close']
        temp_df['spx_shift'] = temp_df['spx'].shift(-rolling_window)
        temp_df['vix_shift'] = temp_df['VIX Close'].shift(-rolling_window)

    # Taking daily vix of the day and scaling to the time-span
    # specified in rolling_window, e.g., for a DTE of 5 days,
    # the daily vix will be scaled by sqrt(5)
    temp_df['period_vix'] = temp_df['vix'] * np.sqrt(rolling_window)

    # Here, the Skew Normal Distribution is invoked to calculate the
    # worst potential 1% return assuming log returns follow a Skew
    # Normal Distribution where the SKEW index approximates the
    # "shape" and the VIX index approximates the "scaling parameter"
    # Mean is assumed to be 0, however, further testing may be needed
    # To determine if a rolling mean-return is necessary

    # Adjusted so that function can check OTM Call VaR given a certain
    # probability level. Call VaR is assuming a normal distribution to
    # be conservative while Put VaR is assuming a skew normal distribution
    # to be conservative.
    if option == 'C':
        var_pct = 1 - var_pct
        temp_df['var_pct'] = norm.ppf(var_pct, 0, temp_df['period_vix'])
    else:
        temp_df['var_pct'] = skn.ppf(var_pct, temp_df['skew'], 0,
                                     temp_df['period_vix'])

    # Using the potential 1% return, the corresponding SPX level is
    # calculated to provide a strike suggestion for the SPX put
    temp_df['var_spx_lvl'] = temp_df['spx'] * np.exp(
        temp_df['var_pct'])  #(1 + temp_df['var_pct'])

    # Calculating what the percentage difference is between the actual realized
    # SPX index versus it's approximated 1% worst case return assuming an SKN
    # This column is only useful after filtering on breaches
    temp_df['actual_to_var_diff'] = temp_df['spx_shift'] / temp_df[
        'var_spx_lvl'] - 1

    # Calculating the actual SPX return for the given rolling_window
    temp_df['actual_spx_return'] = temp_df['spx_shift'] / temp_df['spx'] - 1

    if option == 'C':
        plot_df = temp_df[temp_df['var_spx_lvl'] < temp_df['spx_shift']]
    else:
        plot_df = temp_df[temp_df['var_spx_lvl'] > temp_df['spx_shift']]

    fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(20, 10))
    plot_df[['var_pct', 'actual_spx_return']].plot(ax=axes[0, 0])
    plot_df['actual_spx_return'].plot(ax=axes[1, 0])
    plot_df['actual_to_var_diff'].hist(ax=axes[0, 1])
    plot_df['VIX Close'].hist(ax=axes[1, 1])
    axes[0, 0].set_title('Implied VaR Returns that Breached')
    axes[1, 0].set_title('Actual SPX Returns for Breach')
    axes[0, 1].set_title('Distribution of Breach Percentage')
    axes[1, 1].set_title('Distribution of VIX Close on Trade Day')

    historical_prob_of_breach = 100 * len(plot_df) / float(
        len(temp_df.dropna()))
    print(
        "--------------------------------------------------------------------")
    print("")
    print("The historical probability of breaching is " +
          str(round(historical_prob_of_breach, 2)) + "%")
    print("With the total occurences being " + str(len(plot_df)) + " times")

    if option == 'C':
        plot_df = pd.DataFrame.sort_values(plot_df,
                                           by='actual_to_var_diff',
                                           ascending=False)
    else:
        plot_df = pd.DataFrame.sort_values(plot_df, by='actual_to_var_diff')
    print("With the worst 5 cases as follows:")
    print(plot_df.head())
    print("")
    print(
        "--------------------------------------------------------------------")
    print("")
    print("The latest SPX level and suggested strike is:")
    print(temp_df[['spx', 'VIX Close', 'skew', 'var_spx_lvl']].tail(3))

    return temp_df[[
        'spx', 'spx_shift', 'var_pct', 'var_spx_lvl', 'actual_to_var_diff',
        'VIX Close', 'vix_shift'
    ]]
Exemplo n.º 11
0
 def bestNstep(self, targetAccRate):
     r"""!Return the optimum nstep (float) for given target acceptance rate."""
     return skewnorm.ppf(targetAccRate, *self.bestFit)
Exemplo n.º 12
0
def skew_gauss_ppf(q, A, mu, std, a):
    return skewnorm.ppf(q, a, loc=mu, scale=std)
Exemplo n.º 13
0
a = -10
loc = 0
w = 0.5

median = skewnorm.median(a, loc, w)
s = skewnorm.rvs(a, loc, w, 1000)
mode = loc + w * m0(a)

plt.ion()
ax = plt.subplot(111)

print(type(s))
print(s.shape)

x = np.linspace(skewnorm.ppf(0.01, a, loc, w), skewnorm.ppf(0.99, a, loc, w),
                100)
y = skewnorm.pdf(x, a, loc, w)
mode = x[np.argmax(y)]

x = x - mode
s = s - mode

ax.plot(x, y, 'r-', lw=5, alpha=0.6, label='skewnorm pdf')
ax.axvline(x=0, color='y', label='mode')
ax.hist(s, density=True)
ax.set_xlabel('noise')
ax.set_ylabel('Number of samples/PDF')
ax.set_title('Skewnormal histogram, shape = %.1f, loc = %.1f, w = %.1f' %
             (a, loc, w))
text = 'Shift all noise by mode = %.3f' % mode
Exemplo n.º 14
0
 def transform(self, x):
     q = skewnorm.cdf(x=x, a=self._a, loc=self._loc, scale=self._scale)
     z = skewnorm.ppf(q=q, a=0, loc=0, scale=1)
     return z
Exemplo n.º 15
0
def skew_examples():
    """Visualize left, right, and no skew distributions."""

    # create subplots
    fig, ax = plt.subplots(1, 3, figsize=(20, 4))

    # determine skew
    a = 4

    # find stats for annotation
    mean_skew_val = skewnorm.mean(a)
    median_skew_val = skewnorm.median(a)

    # get x data where PDF has value
    x = np.linspace(skewnorm.ppf(0.001, a), skewnorm.ppf(0.999, a), 100)

    # plot left skew
    ax[0].plot(x * -1, skewnorm.pdf(x, a))
    ax[0].set_title('Left/Negative Skewed')

    # annotate left skew's mode
    ax[0].axvline(-0.42, 0.72, 0.925, color='orange')
    ax[0].text(s='mode', x=-0.49, y=0.5, rotation=90)
    ax[0].axvline(-0.42, 0, 0.53, color='orange')

    # annotate left skew's median
    ax[0].axvline(median_skew_val * -1, 0.52, 0.83, color='orange')
    ax[0].text(s='median', x=-0.74, y=0.35, rotation=90)
    ax[0].axvline(median_skew_val * -1, 0, 0.3, color='orange')

    # annotate left skew's mean
    ax[0].axvline(mean_skew_val * -1, 0.26, 0.77, color='orange')
    ax[0].text(s='mean', x=-0.84, y=0.16, rotation=90)
    ax[0].axvline(mean_skew_val * -1, 0, 0.09, color='orange')

    # plot no skew normal
    ax[1].plot(x, norm.pdf(x, loc=x.mean(), scale=0.56))
    ax[1].set_title('No Skew')

    # annotate mean, median, and mode
    ax[1].text(s='  mean\nmedian\n  mode', x=x.mean() - 0.25, y=0.25)
    ax[1].axvline(x.mean(), 0.5, 0.94, color='orange')
    ax[1].axvline(x.mean(), 0, 0.3, color='orange')

    # plot right skew
    ax[2].plot(x, skewnorm.pdf(x, a))
    ax[2].set_title('Right/Positive Skewed')

    # annotate right skew's mode
    ax[2].axvline(0.42, 0.72, 0.925, color='orange')
    ax[2].text(s='mode', x=0.35, y=0.5, rotation=90)
    ax[2].axvline(0.42, 0, 0.53, color='orange')

    # annotate right skew's median
    ax[2].axvline(median_skew_val, 0.52, 0.83, color='orange')
    ax[2].text(s='median', x=0.6, y=0.35, rotation=90)
    ax[2].axvline(median_skew_val, 0, 0.3, color='orange')

    # annotate right skew's mean
    ax[2].axvline(mean_skew_val, 0.26, 0.77, color='orange')
    ax[2].text(s='mean', x=0.72, y=0.16, rotation=90)
    ax[2].axvline(mean_skew_val, 0, 0.09, color='orange')

    # label axes and set y-axis limits
    for axes in ax:
        axes.set_xlabel('x')
        axes.set_ylabel('f(x)')
        axes.set_ylim(0, 0.75)

    return ax
Exemplo n.º 16
0
from scipy.stats import skewnorm
import matplotlib.pyplot as plt

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 2.5))
x1 = np.linspace(skewnorm.ppf(0.01, -3), skewnorm.ppf(0.99, -3), 100)
x2 = np.linspace(skewnorm.ppf(0.01, 0), skewnorm.ppf(0.99, 0), 100)
x3 = np.linspace(skewnorm.ppf(0.01, 3), skewnorm.ppf(0.99, 3), 100)
ax1.plot(skewnorm(-3).pdf(x1), 'k-', lw=4)
ax2.plot(skewnorm(0).pdf(x2), 'k-', lw=4)
ax3.plot(skewnorm(3).pdf(x3), 'k-', lw=4)

#kurt

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(10, 2.5))
axs = [ax1, ax2, ax3]
Titles = ["Mesokurtic", "Lepkurtic", "Playkurtic"]

#Normal Distoribution
dist = scipy.stats.norm(loc=100, scale=5)
sample_norm = dist.rvs(size=10000)
# Leptokurtic Distoribution
dist2 = scipy.stats.laplace(loc=100, scale=5)
sample_laplace = dist2.rvs(size=10000)

dist3 = scipy.stats.cosine(loc=100, scale=5)
sample_cosine = dist3.rvs(size=10000)

samples = [sample_norm, sample_laplace, sample_cosine]

for n in range(0, len(axs)):
    axs[n].hist(samples[n], bins='auto', normed=True)
Exemplo n.º 17
0
sym_mode = stats.mode(sym_density)
sym_mean = np.mean(sym_density)
sym_median = np.median(sym_density)

plt.subplot(221)
"""
plt.vlines(sym_mean, 0, 0.7, colors='k')
plt.vlines(sym_median, 0, 0.7, colors='g')
plt.vlines(sym_mode[0], 0, 0.7, colors='m')
"""
plt.plot(sym_data, sym_density, 'r-', label='skewnorm pdf')

#mean, var, skew, kurt = skewnorm.stats(a, moments='mvsk')
"""positively skewed dataset"""
a = 2
pos_skewed = np.linspace(skewnorm.ppf(0.1, a), skewnorm.ppf(0.99, a), 100)
#pos_skewed = np.random.exponential(size=100)
#pos_density =expon.pdf(pos_skewed)
pos_density = skewnorm.pdf(pos_skewed, a)
pos_mode = stats.mode(pos_density)
pos_mean = np.mean(pos_density)
pos_median = np.median(pos_density)
plt.subplot(222)
"""
plt.vlines(pos_mean, 0, 0.7, colors='k')
plt.vlines(pos_median, 0, 0.7, colors='g')
plt.vlines(pos_mode[0], 0, 0.7, colors='m')
"""
plt.plot(pos_skewed, pos_density, 'r-', label='skewnorm pdf')
"""negatively skewed dataset"""
a = -4
Exemplo n.º 18
0
from scipy.stats import skewnorm
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1, 1)

# Calculate a few first moments:

a = 4
mean, var, skew, kurt = skewnorm.stats(a, moments='mvsk')

# Display the probability density function (``pdf``):

x = np.linspace(skewnorm.ppf(0.01, a),
                skewnorm.ppf(0.99, a), 100)
ax.plot(x, skewnorm.pdf(x, a),
       'r-', lw=5, alpha=0.6, label='skewnorm pdf')

# Alternatively, the distribution object can be called (as a function)
# to fix the shape, location and scale parameters. This returns a "frozen"
# RV object holding the given parameters fixed.

# Freeze the distribution and display the frozen ``pdf``:

rv = skewnorm(a)
ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf')

# Check accuracy of ``cdf`` and ``ppf``:

vals = skewnorm.ppf([0.001, 0.5, 0.999], a)
np.allclose([0.001, 0.5, 0.999], skewnorm.cdf(vals, a))
# True
Exemplo n.º 19
0
# https://stackoverflow.com/questions/66986076/matplotlib-time-on-x-axis-from-datetime-json

from scipy.stats import skewnorm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

fig, ax = plt.subplots(1, 1)
a = 4
x = np.linspace(skewnorm.ppf(0.01, a), skewnorm.ppf(0.99, a), 50)
d = pd.date_range("2021-04-06 12:00:00", "2021-04-06 16:00:00", 50)

ax.plot(d, skewnorm.pdf(x, a), 'b-', label='skewnorm pdf')

# from matplotlib.dates import AutoDateFormatter, AutoDateLocator
# xtick_locator = AutoDateLocator(minticks=3, maxticks=15)
# xtick_formatter = AutoDateFormatter(xtick_locator)
# ax.xaxis.set_major_locator(xtick_locator)
# ax.xaxis.set_major_formatter(xtick_formatter)
ax.xaxis.set_major_formatter(DateFormatter('%Y-%m-%d %H:%M:%S'))

fig.autofmt_xdate()
plt.show()

# used this code sample as a reference: https://matplotlib.org/stable/gallery/text_labels_and_annotations/date.html
# found out how to use scipy.skewnorm from here: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.skewnorm.html. Stil not sure about all the values, though
# see autofmt_xdate(): https://matplotlib.org/stable/api/figure_api.html?highlight=autofmt#matplotlib.figure.Figure.autofmt_xdate
# pandas.date_range() is pretty useful here. Found out about this from https://jakevdp.github.io/PythonDataScienceHandbook/03.11-working-with-time-series.html
Exemplo n.º 20
0
 def inverse_transform(self, z, copy=None):
     q = skewnorm.cdf(x=z, a=0, loc=0, scale=1)
     x = skewnorm.ppf(q=q, a=self._a, loc=self._loc, scale=self._scale)
     return x
 def generic_dispersion(self, nd_dict, GH_dict=None):
     weight_arrays = []
     value_arrays = []
     for i in range(0,
                    len(self.simulation_options["dispersion_parameters"])):
         if self.simulation_options["dispersion_distributions"][
                 i] == "uniform":
             value_arrays.append(
                 np.linspace(
                     self.simulation_options["dispersion_parameters"][i] +
                     "_lower",
                     self.simulation_options["dispersion_parameters"][i] +
                     "_upper",
                     self.simulation_options["dispersion_bins"][i]))
             weight_arrays.append(
                 [1 / self.simulation_options["dispersion_bins"][i]] *
                 self.simulation_options["dispersion_bins"][i])
         elif self.simulation_options["dispersion_distributions"][
                 i] == "normal":
             param_mean = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_mean"]
             param_std = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_std"]
             if type(GH_dict) is dict:
                 param_vals = [
                     (param_std * math.sqrt(2) * node) + param_mean
                     for node in GH_dict["nodes"]
                 ]
                 param_weights = GH_dict["normal_weights"]
             else:
                 min_val = norm.ppf(1e-4, loc=param_mean, scale=param_std)
                 max_val = norm.ppf(1 - 1e-4,
                                    loc=param_mean,
                                    scale=param_std)
                 param_vals = np.linspace(
                     min_val, max_val,
                     self.simulation_options["dispersion_bins"][i])
                 param_weights = np.zeros(
                     self.simulation_options["dispersion_bins"][i])
                 param_weights[0] = norm.cdf(param_vals[0],
                                             loc=param_mean,
                                             scale=param_std)
                 param_midpoints = np.zeros(
                     self.simulation_options["dispersion_bins"][i])
                 param_midpoints[0] = norm.ppf((1e-4 / 2),
                                               loc=param_mean,
                                               scale=param_std)
                 for j in range(
                         1, self.simulation_options["dispersion_bins"][i]):
                     param_weights[j] = norm.cdf(
                         param_vals[j], loc=param_mean,
                         scale=param_std) - norm.cdf(param_vals[j - 1],
                                                     loc=param_mean,
                                                     scale=param_std)
                     param_midpoints[j] = (param_vals[j - 1] +
                                           param_vals[j]) / 2
                 param_vals = param_midpoints
             value_arrays.append(param_vals)
             weight_arrays.append(param_weights)
         elif self.simulation_options["dispersion_distributions"][
                 i] == "lognormal":
             param_loc = 0
             param_shape = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_shape"]
             param_scale = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_scale"]
             print("shape, scale", param_shape, param_scale)
             min_val = lognorm.ppf(1e-4,
                                   param_shape,
                                   loc=param_loc,
                                   scale=param_scale)
             max_val = lognorm.ppf(1 - 1e-4,
                                   param_shape,
                                   loc=param_loc,
                                   scale=param_scale)
             param_vals = np.linspace(
                 min_val, max_val,
                 self.simulation_options["dispersion_bins"][i])
             param_weights = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_weights[0] = lognorm.cdf(param_vals[0],
                                            param_shape,
                                            loc=param_loc,
                                            scale=param_scale)
             param_midpoints = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_midpoints[0] = lognorm.ppf((1e-4 / 2),
                                              param_shape,
                                              loc=param_loc,
                                              scale=param_scale)
             for j in range(1,
                            self.simulation_options["dispersion_bins"][i]):
                 param_weights[j] = lognorm.cdf(
                     param_vals[j],
                     param_shape,
                     loc=param_loc,
                     scale=param_scale) - lognorm.cdf(param_vals[j - 1],
                                                      param_shape,
                                                      loc=param_loc,
                                                      scale=param_scale)
                 param_midpoints[j] = (param_vals[j - 1] +
                                       param_vals[j]) / 2
             value_arrays.append(param_midpoints)
             weight_arrays.append(param_weights)
         elif self.simulation_options["dispersion_distributions"][
                 i] == "skewed_normal":
             param_mean = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_mean"]
             param_std = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_std"]
             param_skew = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_skew"]
             min_val = skewnorm.ppf(1e-4,
                                    param_skew,
                                    loc=param_mean,
                                    scale=param_std)
             max_val = skewnorm.ppf(1 - 1e-4,
                                    param_skew,
                                    loc=param_mean,
                                    scale=param_std)
             param_vals = np.linspace(
                 min_val, max_val,
                 self.simulation_options["dispersion_bins"][i])
             param_weights = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_weights[0] = skewnorm.cdf(param_vals[0],
                                             param_skew,
                                             loc=param_mean,
                                             scale=param_std)
             param_midpoints = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_midpoints[0] = skewnorm.ppf((1e-4 / 2),
                                               param_skew,
                                               loc=param_mean,
                                               scale=param_std)
             for j in range(1,
                            self.simulation_options["dispersion_bins"][i]):
                 param_weights[j] = skewnorm.cdf(
                     param_vals[j],
                     param_skew,
                     loc=param_mean,
                     scale=param_std) - skewnorm.cdf(param_vals[j - 1],
                                                     param_skew,
                                                     loc=param_mean,
                                                     scale=param_std)
                 param_midpoints[j] = (param_vals[j - 1] +
                                       param_vals[j]) / 2
             value_arrays.append(param_midpoints)
             weight_arrays.append(param_weights)
         elif self.simulation_options["dispersion_distributions"][
                 i] == "log_uniform":
             param_upper = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_logupper"]
             param_lower = nd_dict[
                 self.simulation_options["dispersion_parameters"][i] +
                 "_loglower"]
             min_val = loguniform.ppf(1e-4,
                                      param_lower,
                                      param_upper,
                                      loc=0,
                                      scale=1)
             max_val = loguniform.ppf(1 - 1e-4,
                                      param_lower,
                                      param_upper,
                                      loc=0,
                                      scale=1)
             param_vals = np.linspace(
                 min_val, max_val,
                 self.simulation_options["dispersion_bins"][i])
             param_weights = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_weights[0] = loguniform.cdf(min_val,
                                               param_lower,
                                               param_upper,
                                               loc=0,
                                               scale=1)
             param_midpoints = np.zeros(
                 self.simulation_options["dispersion_bins"][i])
             param_midpoints[0] = loguniform.ppf((1e-4) / 2,
                                                 param_lower,
                                                 param_upper,
                                                 loc=0,
                                                 scale=1)
             for j in range(1,
                            self.simulation_options["dispersion_bins"][i]):
                 param_weights[j] = loguniform.cdf(
                     param_vals[j],
                     param_lower,
                     param_upper,
                     loc=0,
                     scale=1) - loguniform.cdf(param_vals[j - 1],
                                               param_lower,
                                               param_upper,
                                               loc=0,
                                               scale=1)
                 param_midpoints[j] = (param_vals[j - 1] +
                                       param_vals[j]) / 2
             value_arrays.append(param_midpoints)
             weight_arrays.append(param_weights)
     total_len = np.prod(self.simulation_options["dispersion_bins"])
     weight_combinations = list(itertools.product(*weight_arrays))
     value_combinations = list(itertools.product(*value_arrays))
     sim_params = copy.deepcopy(
         self.simulation_options["dispersion_parameters"])
     for i in range(0, len(sim_params)):
         if sim_params[i] == "E0":
             sim_params[i] = "E_0"
         if sim_params[i] == "k0":
             sim_params[i] = "k_0"
     return sim_params, value_combinations, weight_combinations
Exemplo n.º 22
0
def sep_two_skewed_normals(x, th_init):
    x0 = x[x < th_init]
    x1 = x[x >= th_init]

    if x0.size == 0:
        return th_init, (x.min() - 1, x1.mean(), 0.01, x1.std(), 0, 0)
    if x1.size == 1:
        a1 = TH_SKEWNESS
        m1 = x0.mean()
        s1 = MIN_SCALE
    else:
        a1, m1, s1 = skewnorm.fit(x1)
        if a1 > TH_SKEWNESS:
            a1, m1, s1 = skewnorm.fit(x1, f0=TH_SKEWNESS)
    if x0.size == 1:
        a0 = TH_SKEWNESS
        m0 = x0.mean()
        s0 = MIN_SCALE
    else:
        a0, m0, s0 = skewnorm.fit(x0)
        if a0 > TH_SKEWNESS:
            a0, m0, s0 = skewnorm.fit(x0, f0=TH_SKEWNESS)

    num_x0_last = x0.size
    num_change = 1
    x_sorted = sorted(x)
    nums_x0 = [num_x0_last, ]
    while num_change:
        # E, binary search for new th
        i0 = int(x0.size/2)
        i1 = x.size - int(x1.size/2)
        while i1 - i0 > 1:
            i = int((i0 + i1) / 2)
            p0 = skewnorm.pdf(x_sorted[i], a0, m0, s0) - skewnorm.pdf(x_sorted[i], a1, m1, s1)
            if p0 > 0:
                i0 = i
            else:
                i1 = i

        th = (x_sorted[i0] + x_sorted[i1]) / 2

        x0 = x[x < th]
        x1 = x[x >= th]

        # M
        if x0.size == 0:
            break
        if x1.size == 1:
            a1 = TH_SKEWNESS
            m1 = x0.mean()
            s1 = MIN_SCALE
        else:
            a1, m1, s1 = skewnorm.fit(x1)
            if a1 > TH_SKEWNESS:
                a1, m1, s1 = skewnorm.fit(x1, f0=TH_SKEWNESS)
        if x0.size == 1:
            a0 = TH_SKEWNESS
            m0 = x0.mean()
            s0 = MIN_SCALE
        else:
            a0, m0, s0 = skewnorm.fit(x0)
            if a0 > TH_SKEWNESS:
                a0, m0, s0 = skewnorm.fit(x0, f0=TH_SKEWNESS)

        # update
        num_change = x0.size - num_x0_last
        num_x0_last = x0.size
        if num_x0_last not in nums_x0:
            nums_x0.append(num_x0_last)
        else:
            break

    th = min(skewnorm.ppf(TH_SKEWNORM_PPF, a0, m0, s0), th)
    # extreme case that under very weak L1 constraint, negligible cluster is fitted with large sigma
    if s1 > 0.1 and s0 / s1 > 10:
        th = min(skewnorm.ppf(1e-4, a1, m1, s1), th)
    return th, (m0, m1, s0, s1, a0, a1)
Exemplo n.º 23
0
    ax[0][0].plot(x, norm.pdf(x, loc=0, scale=1), 'bo', alpha=0.6, label='norm pdf (scipy)')
    ax[0][0].plot(x, normal_pdf(x, mu=0, sigma=1), 'r.', alpha=0.6, label='norm pdf (custom)')
    ax[0][0].legend(loc='best', frameon=False)
    ax[0][0].set_title("Normal PDF Comparison ($\mu = 0$ $\sigma = 1$)")

    ax[1][0].plot(x, norm.cdf(x), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[1][0].plot(x, normal_cdf(x, mu=0, sigma=1), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)')
    ax[1][0].legend(loc='best', frameon=False)
    ax[1][0].set_title("Normal CDF Comparison ($\mu = 0$ $\sigma = 1$)")

    ax[2][0].plot(y, norm.ppf(y), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[2][0].legend(loc='best', frameon=False)
    ax[2][0].set_title("PPF Comparison ($\mu = 0$ $\sigma = 1$)")

    ax[0][1].plot(x, skewnorm.pdf(x, loc=0, scale=1, a=4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[0][1].plot(x, skew_normal_pdf(x, epsilon=0, omega=1, alpha=4), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)')
    ax[0][1].legend(loc='best', frameon=False)
    ax[0][1].set_title("Skew-normal CDF Comparison ($\mu = 0$ $\sigma = 1$ $alpha = -4$)")

    ax[1][1].plot(x, skewnorm.cdf(x, loc=0, scale=1, a=-4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[1][1].plot(x, skew_normal_cdf(x, epsilon=0, omega=1, alpha=-4), 'r.', lw=5, alpha=0.6, label='norm cdf (custom)')
    ax[1][1].legend(loc='best', frameon=False)
    ax[1][1].set_title("Skew-normal CDF Comparison ($\mu = 0$ $\sigma = 1$ $alpha = -4$)")

    ax[2][1].plot(y, skewnorm.ppf(y, a=-4), 'bo', lw=5, alpha=0.6, label='norm cdf (scipy)')
    ax[2][1].legend(loc='best', frameon=False)
    ax[2][1].set_title("PPF Comparison ($\mu = 0$ $\sigma = 1$)")

    plt.show()
#			 'conjunction':{'PoSTagging':0.0, 'is':0.1, 'used':0.0, 'to':0.5, 'annotate':0.0, 'complex':0.0, 'sentences':0.0, 'with':0.4,'PoS':0.0}
#			}

#Part of Speech Tagging - Named Entity Recognition
#observations=['PoSTagging', 'is', 'used', 'to', 'annotate', 'complex', 'sentences', 'with','PoS']
#observations=['Python','is','interpreted']

obs_file = open("NamedEntityRecognition_HMMViterbi_CRF.observations", "r")
observations = obs_file.read().split()

emission_probabilities = {}
cnt = len(states)

for s in states:
    emissionsdict = {}
    x = np.linspace(skewnorm.ppf(0.01, 5, cnt * 0.5, 1),
                    skewnorm.ppf(0.99, 5, cnt * 0.5, 1), len(observations))
    sknormpdf = skewnorm.pdf(x, 5, cnt * 0.5, 1)
    sknormpdf_weighted = []
    for p, q in zip(sknormpdf, x):
        sknormpdf_weighted.append(p * q * 0.01)
    print "skewnorm pdf weighted:", sknormpdf_weighted
    ax.plot(x,
            skewnorm.pdf(x, 5, cnt * 0.5, 1),
            'r-',
            lw=5,
            alpha=0.6,
            label='skewnorm pdf')
    obs_cnt = 0
    for o in observations:
        #print "emissiondict[o] = ",float(obs_cnt+cnt)/float(len(states) + len(observations))
## Null Hypothesis - No Skewness

# In[375]:

# Test the data for skewness
print("Skewtest result: ", skewtest(losses))

# In[376]:

# Fit the portfolio loss data to the skew-normal distribution
params = skewnorm.fit(losses)

# In[377]:

# Compute the 95% VaR from the fitted distribution, using parameter estimates
VaR_95 = skewnorm.ppf(0.95, *params)
print("VaR_95 from skew-normal: ", VaR_95)

# Losses are not normally distributed as the critical value exceeeds the 99% conidence interval of test statistic value
# Losses can be skewed
#
# Definition wiki - anderson
# In many cases (but not all), you can determine a p value for the Anderson-Darling statistic and use that value to help you
# determine if the test is significant are not. Remember the p ("probability") value is the probability of getting a result
# #that is more extreme if the null hypothesis is true. If the p value is low (e.g., <=0.05), you conclude that the data do
# not follow the normal distribution. Remember that you chose the significance level even though many people just use 0.05
# the vast majority of the time. We will look at two different data sets and apply the Anderson-Darling test to both sets.
#
#

# Note that although the VaR estimate for the
Exemplo n.º 26
0
import numpy as np
from scipy.stats import skewnorm
from master import *

##start##
a = 2
steps = 1000
color_max = 1 / 255
t_max = 300 * skewnorm.ppf(0.5, a, scale=color_max)

p_paths = CreatePixelPaths(steps)

t = np.linspace(skewnorm.ppf(0.01, a, scale=color_max), t_max, steps)
count = 0
while (count < 100):
    color_index = np.random.randint(0, 3)
    offset = np.random.uniform(low=0, high=t_max)
    pulse = skewnorm.pdf(t, a, loc=offset, scale=color_max)
    for i in range(total_lights):
        for j in range(steps):
            p_paths[0][j][color_index] += pulse[j]
            p_paths[i][j][color_index] = p_paths[0][j][color_index]
    count += 1

##end of random_rbg_pulses part##
ShowPaths(p_paths)