示例#1
0
def _indvdl_t(hparams, std_x, n_samples, L_cov, verbose=0):
    df_L = hparams.df_indvdl
    dist_scale_indvdl = hparams.dist_scale_indvdl
    scale1 = std_x[0] * _dist_from_str('scale_mu1s', dist_scale_indvdl)
    scale2 = std_x[1] * _dist_from_str('scale_mu2s', dist_scale_indvdl)

    scale1 = scale1 / np.sqrt(df_L / (df_L - 2))
    scale2 = scale2 / np.sqrt(df_L / (df_L - 2))

    u1s = StudentT('u1s',
                   nu=np.float32(df_L),
                   shape=(n_samples, ),
                   dtype=floatX)
    u2s = StudentT('u2s',
                   nu=np.float32(df_L),
                   shape=(n_samples, ),
                   dtype=floatX)

    L_cov_ = cholesky(L_cov).astype(floatX)

    mu1s_ = Deterministic(
        'mu1s_', L_cov_[0, 0] * u1s * scale1 + L_cov_[1, 0] * u2s * scale1)
    mu2s_ = Deterministic('mu2s_', L_cov_[1, 0] * u1s * scale2 +
                          L_cov_[1, 1] * u2s * scale2)  # [1, 0] is ... 0?

    if 10 <= verbose:
        print('StudentT for individual effect')
        print('u1s.dtype = {}'.format(u1s.dtype))
        print('u2s.dtype = {}'.format(u2s.dtype))

    return mu1s_, mu2s_
示例#2
0
def _indvdl_t(
    hparams, std_x, n_samples, L_cov, StudentT, Deterministic, floatX, 
    cholesky, tt, verbose):
    df_L = hparams['df_indvdl']
    scale1 = np.float32(std_x[0] * hparams['v_indvdl_1'] / 
                        np.sqrt(df_L / (df_L - 2)))
    scale2 = np.float32(std_x[1] * hparams['v_indvdl_2'] / 
                        np.sqrt(df_L / (df_L - 2)))

    u1s = StudentT('u1s', nu=np.float32(df_L), shape=(n_samples,), 
                   dtype=floatX)
    u2s = StudentT('u2s', nu=np.float32(df_L), shape=(n_samples,), 
                   dtype=floatX)

    L_cov_ = cholesky(L_cov).astype(floatX)
    tt.set_subtensor(L_cov_[0, :], L_cov_[0, :] * scale1, inplace=True)
    tt.set_subtensor(L_cov_[1, :], L_cov_[1, :] * scale2, inplace=True)
    mu1s_ = Deterministic('mu1s_', 
                          L_cov_[0, 0] * u1s + L_cov_[0, 1] * u2s)
    mu2s_ = Deterministic('mu2s_', 
                          L_cov_[1, 0] * u1s + L_cov_[1, 1] * u2s)

    if 10 <= verbose:
        print('StudentT for individual effect')
        print('u1s.dtype = {}'.format(u1s.dtype))
        print('u2s.dtype = {}'.format(u2s.dtype))

    return mu1s_, mu2s_
示例#3
0
def model_returns_t_alpha_beta(data, bmark, samples=2000):
    """
    Run Bayesian alpha-beta-model with T distributed returns.

    This model estimates intercept (alpha) and slope (beta) of two
    return sets. Usually, these will be algorithm returns and
    benchmark returns (e.g. S&P500). The data is assumed to be T
    distributed and thus is robust to outliers and takes tail events
    into account.  If a pandas.DataFrame is passed as a benchmark, then
    multiple linear regression is used to estimate alpha and beta.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    bmark : pandas.DataFrame
        DataFrame of benchmark returns (e.g., S&P500) or risk factors (e.g.,
        Fama-French SMB, HML, and UMD).
        If bmark has more recent returns than returns_train, these dates
        will be treated as missing values and predictions will be
        generated for them taking market correlations into account.
    samples : int (optional)
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """

    data_bmark = pd.concat([data, bmark], axis='columns').dropna()

    with pm.Model() as model:
        sigma = pm.HalfCauchy('sigma', beta=1)
        nu = pm.Exponential('nu_minus_two', 1. / 10.)

        # alpha and beta
        X = data_bmark.iloc[:, 1]
        y = data_bmark.iloc[:, 0]

        alpha_reg = pm.Normal('alpha', mu=0, sd=.1)
        beta_reg = pm.Normal('beta', mu=0, sd=1)

        mu_reg = alpha_reg + beta_reg * X
        StudentT('returns', nu=nu + 2, mu=mu_reg, sd=sigma, observed=y)
        trace = pm.sample(samples)

    return model, trace
示例#4
0
def model_stoch_vol(data, samples=2000):
    """
    Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.math.exp(-2 * s))
        StudentT('r', nu, lam=volatility_process, observed=data)
        start = pm.find_MAP(vars=[s], fmin=sp.optimize.fmin_l_bfgs_b)

        step = pm.NUTS(scaling=start)
        trace = pm.sample(100, step, progressbar=False)

        # Start next run at the last sampled position.
        step = pm.NUTS(scaling=trace[-1], gamma=.25)
        trace = pm.sample(samples, step, start=trace[-1], progressbar=False)

    return model, trace
示例#5
0
def model_returns_t(data, samples=500):
    """
    Run Bayesian model assuming returns are Student-T distributed.

    Compared with the normal model, this model assumes returns are
    T-distributed and thus have a 3rd parameter (nu) that controls the
    mass in the tails.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """

    with pm.Model() as model:
        mu = pm.Normal('mean returns', mu=0, sd=.01, testval=data.mean())
        sigma = pm.HalfCauchy('volatility', beta=1, testval=data.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=3.)

        returns = StudentT('returns',
                           nu=nu + 2,
                           mu=mu,
                           sd=sigma,
                           observed=data)
        pm.Deterministic('annual volatility',
                         returns.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic(
            'sharpe', returns.distribution.mean /
            returns.distribution.variance**.5 * np.sqrt(252))

        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start)
    return model, trace
示例#6
0
def model_stoch_vol(data, samples=2000):
    """
    Run stochastic volatility model.

    This model estimates the volatility of a returns series over time.
    Returns are assumed to be T-distributed. lambda (width of
    T-distributed) is assumed to follow a random-walk.

    Parameters
    ----------
    data : pandas.Series
        Return series to model.
    samples : int, optional
        Posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    from pymc3.distributions.timeseries import GaussianRandomWalk

    with pm.Model() as model:
        nu = pm.Exponential('nu', 1. / 10, testval=5.)
        sigma = pm.Exponential('sigma', 1. / .02, testval=.1)
        s = GaussianRandomWalk('s', sigma**-2, shape=len(data))
        volatility_process = pm.Deterministic('volatility_process',
                                              pm.math.exp(-2 * s))
        StudentT('r', nu, lam=volatility_process, observed=data)

        trace = pm.sample(samples)

    return model, trace
示例#7
0
def model_best(y1, y2, samples=1000):
    """
    Bayesian Estimation Supersedes the T-Test

    This model runs a Bayesian hypothesis comparing if y1 and y2 come
    from the same distribution. Returns are assumed to be T-distributed.

    In addition, computes annual volatility and Sharpe of in and
    out-of-sample periods.

    This model replicates the example used in:
    Kruschke, John. (2012) Bayesian estimation supersedes the t
    test. Journal of Experimental Psychology: General.

    Parameters
    ----------
    y1 : array-like
        Array of returns (e.g. in-sample)
    y2 : array-like
        Array of returns (e.g. out-of-sample)
    samples : int, optional
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.

    See Also
    --------
    plot_stoch_vol : plotting of tochastic volatility model
    """

    y = np.concatenate((y1, y2))

    mu_m = np.mean(y)
    mu_p = 0.000001 * 1 / np.std(y)**2

    sigma_low = np.std(y) / 1000
    sigma_high = np.std(y) * 1000
    with pm.Model() as model:
        group1_mean = pm.Normal('group1_mean',
                                mu=mu_m,
                                tau=mu_p,
                                testval=y1.mean())
        group2_mean = pm.Normal('group2_mean',
                                mu=mu_m,
                                tau=mu_p,
                                testval=y2.mean())
        group1_std = pm.Uniform('group1_std',
                                lower=sigma_low,
                                upper=sigma_high,
                                testval=y1.std())
        group2_std = pm.Uniform('group2_std',
                                lower=sigma_low,
                                upper=sigma_high,
                                testval=y2.std())
        nu = pm.Exponential('nu_minus_two', 1 / 29., testval=4.) + 2.

        returns_group1 = StudentT('group1',
                                  nu=nu,
                                  mu=group1_mean,
                                  lam=group1_std**-2,
                                  observed=y1)
        returns_group2 = StudentT('group2',
                                  nu=nu,
                                  mu=group2_mean,
                                  lam=group2_std**-2,
                                  observed=y2)

        diff_of_means = pm.Deterministic('difference of means',
                                         group2_mean - group1_mean)
        pm.Deterministic('difference of stds', group2_std - group1_std)
        pm.Deterministic(
            'effect size', diff_of_means / pm.math.sqrt(
                (group1_std**2 + group2_std**2) / 2))

        pm.Deterministic(
            'group1_annual_volatility',
            returns_group1.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'group2_annual_volatility',
            returns_group2.distribution.variance**.5 * np.sqrt(252))

        pm.Deterministic(
            'group1_sharpe', returns_group1.distribution.mean /
            returns_group1.distribution.variance**.5 * np.sqrt(252))
        pm.Deterministic(
            'group2_sharpe', returns_group2.distribution.mean /
            returns_group2.distribution.variance**.5 * np.sqrt(252))

        trace = pm.sample(samples)
    return model, trace
# mu = log(MEASURE) = ALPHA+BETA*log(X)
#######################################

with Model() as cost_model:
    # Priors for unknown cost model parameters
    ALPHA = Normal('ALPHA', mu=0, sigma=1000)
    BETA = Normal('BETA', mu=0, sigma=1000, shape=len(ATT))
    SIGMA = HalfNormal('SIGMA', sigma=100)

    # Model
    MU = ALPHA + dot(X_INPUT, BETA)
    NU = Deterministic('NU', Exponential('nu_', 1 / 29))

    # Likelihood (sampling distribution) of observations
    #     Y_OBS = Normal('Y_OBS', mu=mu, sigma=sigma, observed=Y_OUTPUT)
    Y_OBS = StudentT('Y_OBS', mu=MU, sigma=SIGMA, observed=Y_OUTPUT, nu=NU)

with cost_model:
    TRACE = sample(SAMPLES, tune=TUNE, cores=6)
    traceplot(TRACE)

with cost_model:
    Y_PRED = sample_posterior_predictive(TRACE, 1000, cost_model)
    Y_ = Y_PRED['Y_OBS'].mean(axis=0)
    PP['model_cost'] = exp(Y_)  # depends on imput/output
    SUMMARY = df_summary(TRACE)

with open('Time_and_Material_cost_model.pkl', 'wb') as f:
    dump({'model': cost_model, 'TRACE': TRACE}, f)

PROMPTS['F_BASENAME'] = F_BASENAME
示例#9
0
# define the model
# \sig ~ exp(50)
#       why? stdev of returns is approx 0.02
#       stdev of exp(lam=50) = 0.2
# \nu ~ exp(0.1)
#       the DOF for the student T...which should be sample size
#       mean of exp(lam=0.1) = 10
# s_i ~ normal(s_i-1, \sig^-2)
# log(y_i) ~ studentT(\nu, 0, exp(-2s_i))
with Model() as sp500_model:
    nu = Exponential('nu', 1. / 10,
                     testval=5.)  #50, testval=5.)#results similar...
    sigma = Exponential('sigma', 1. / .02, testval=.1)
    s = GaussianRandomWalk('s', sigma**-2, shape=len(returns))
    volatility_process = Deterministic('volatility_process', exp(-2 * s))
    r = StudentT('r', nu, lam=1 / volatility_process, observed=returns)

# fit the model using NUTS
# NUTS is auto-assigned in sample()...why?
# you may get an error like:
#   WARNING (theano.gof.compilelock): Overriding existing lock by dead process '10876' (I am process '3456')
# ignore it...the process will move along
with sp500_model:
    trace = sample(2000, progressbar=False)
# plot results from model fitting...
# is there a practical reason for starting the plot from 200th sample
traceplot(trace[200:], [nu, sigma])

# plot the results: volatility inferred by the model
fig, ax = plt.subplots()  #figsize=(15, 8))
returns.plot(ax=ax)
示例#10
0
def model_returns_t_alpha_beta(data, bmark, samples=2000):
    """Run Bayesian alpha-beta-model with T distributed returns.

    This model estimates intercept (alpha) and slope (beta) of two
    return sets. Usually, these will be algorithm returns and
    benchmark returns (e.g. S&P500). The data is assumed to be T
    distributed and thus is robust to outliers and takes tail events
    into account.  If a pandas.DataFrame is passed as a benchmark, then
    multiple linear regression is used to estimate alpha and beta.

    Parameters
    ----------
    returns : pandas.Series
        Series of simple returns of an algorithm or stock.
    bmark : pandas.DataFrame
        DataFrame of benchmark returns (e.g., S&P500) or risk factors (e.g.,
        Fama-French SMB, HML, and UMD).
        If bmark has more recent returns than returns_train, these dates
        will be treated as missing values and predictions will be
        generated for them taking market correlations into account.
    samples : int (optional)
        Number of posterior samples to draw.

    Returns
    -------
    model : pymc.Model object
        PyMC3 model containing all random variables.
    trace : pymc3.sampling.BaseTrace object
        A PyMC3 trace object that contains samples for each parameter
        of the posterior.
    """
    if data.shape[0] != bmark.shape[0]:
        data = pd.Series(data, index=bmark.index)

    data_no_missing = data.dropna()

    if bmark.ndim == 1:
        bmark = pd.DataFrame(bmark)

    bmark = bmark.loc[data_no_missing.index]
    n_bmark = bmark.shape[1]

    with pm.Model() as model:
        sigma = pm.HalfCauchy(
            'sigma',
            beta=1,
            testval=data_no_missing.values.std())
        nu = pm.Exponential('nu_minus_two', 1. / 10., testval=.3)

        # alpha and beta
        X = bmark.loc[data_no_missing.index]
        X.loc[:, 'ones'] = 1.
        y = data_no_missing
        alphabeta_init = np.linalg.lstsq(X, y)[0]

        alpha_reg = pm.Normal('alpha', mu=0, sd=.1, testval=alphabeta_init[-1])
        beta_reg = pm.Normal('beta', mu=0, sd=1,
                             testval=alphabeta_init[:-1], shape=n_bmark)
        bmark_theano = tt.as_tensor_variable(bmark.values.T)
        mu_reg = alpha_reg + tt.dot(beta_reg, bmark_theano)
        StudentT('returns',
                 nu=nu + 2,
                 mu=mu_reg,
                 sd=sigma,
                 observed=data)
        start = pm.find_MAP(fmin=sp.optimize.fmin_powell)
        step = pm.NUTS(scaling=start)
        trace = pm.sample(samples, step, start=start, njobs=1)

    return model, trace
示例#11
0
文件: GHME_2013.py 项目: zaczou/pymc3
def interpolate(x0, y0, x, group):
    x = np.array(x)
    group = np.array(group)

    idx = np.searchsorted(x0, x)
    dl = np.array(x - x0[idx - 1])
    dr = np.array(x0[idx] - x)
    d = dl + dr
    wl = dr / d

    return wl * y0[idx - 1, group] + (1 - wl) * y0[idx, group]


with Model() as model:
    coeff_sd = StudentT('coeff_sd', 10, 1, 5**-2)

    y = GaussianRandomWalk('y', sd=coeff_sd, shape=(nknots, ncountries))

    p = interpolate(knots, y, age, group)

    sd = StudentT('sd', 10, 2, 5**-2)

    vals = Normal('vals', p, sd=sd, observed=rate)

# <markdowncell>

# Model Fitting
# -------------

# <codecell>