Пример #1
0
def makeLinearRegression(xObs, yObs, xerr, yerr):
    print len(xObs), len(yObs), len(xerr), len(yerr)
    delta = np.ones(len(xerr))
    xycov = np.zeros(len(xerr))
    model = linmix.LinMix(xObs, yObs, xerr, yerr, xycov, delta, 2, 2)
    model.run_mcmc(5000, 10000, silent=False)
    # return intercept, slope, scatter
    return model.chain['alpha'], model.chain['beta'],\
           np.sqrt(model.chain['sigsqr'])
Пример #2
0
def run_linmix(x, y, err_x, err_y, Nmin=5000, Nmax=10000, vb=True):
    # pylint: disable = too-many-arguments
    ''' Runs the Kelly regression algorithm through the package linmix.'''
    ''' For convenience, here are the linmix arguments:

        Linmix Args:
            x(array_like): The observed independent variable.
            y(array_like): The observed dependent variable.
            xsig(array_like): 1-sigma measurement errors in x.
            ysig(array_like): 1-sigma measurement errors in y.
            xycov(array_like): Covariance between the measurement errors in x
                               and y.
            delta(array_like): Array indicating whether a data point is
                               censored (i.e., not detected), or not.
                               If delta[i] == 1, then the ith source is
                               detected. If delta[i] == 0, then the ith source
                               is not detected and y[i] will be interpreted as
                               an upper limit. Note that if there are censored
                               data points, then the maximum-likelihood
                               estimate (alpha, beta, sigsqr) is not valid. By
                               default, all data points are assumed to be
                               detected.
            K(int): The number of Gaussians to use in the mixture model
                    for the distribution of xi.
            nchains(int): The number of Monte Carlo Markov Chains to
                          instantiate.
    '''

    # Make sure dimensions are correct
    assert np.size(x) == np.size(y)
    assert np.size(err_x) == np.size(err_y)
    assert np.size(x) == np.size(err_x)

    L = np.size(x)

    # FIX: Implement censored data!
    # Run linmix MCMC
    delta = np.ones(L)
    xycov = np.zeros(L)
    model = linmix.LinMix(x, y, err_x, err_y, xycov, delta, 2, 2)
    model.run_mcmc(Nmin, Nmax, silent=vb)

    # return intercept, slope, intrinsic scatter
    intercept = model.chain['alpha']
    slope = model.chain['beta']
    sigma = np.sqrt(model.chain['sigsqr'])

    # Return fit parameters consistently with run_lrgs
    return (intercept, slope, sigma)
Пример #3
0
def run():
    import astropy.io.ascii as ascii
    try:
        a = ascii.read('test.dat')
    except:
        generate_test_data()
        a = ascii.read('test.dat')

    lm = linmix.LinMix(a['x'], a['y'], a['xsig'], a['ysig'], delta=a['delta'])
    lm.run_mcmc()
    ascii.write(
        lm.chain[[
            'alpha', 'beta', 'sigsqr', 'mu0', 'usqr', 'wsqr', 'ximean',
            'xisig', 'corr'
        ]], 'test.pyout')
Пример #4
0
def do_regression(XX, YY, EX, EY, DELTA, label):

    # run the linear mixture regression
    lm = linmix.LinMix(XX, YY, EX, EY, delta=DELTA)
    lm.run_mcmc(miniter=10000)

    # save the chains
    os.system('rm -rf outfiles/' + label + '.regression.npz')
    np.savez('outfiles/' + label + '.regression.npz', chain=lm.chain)

    # inference summaries
    alp = lm.chain['alpha']
    bet = lm.chain['beta']
    sca = np.sqrt(lm.chain['sigsqr'])
    cor = lm.chain['corr']

    CI_levs = [15.865, 84.135]
    CI_alp = np.percentile(alp, CI_levs)
    kde_alp = stats.gaussian_kde(alp)
    ndisc = np.round((CI_alp[1] - CI_alp[0]) / 0.01)
    x_alp = np.linspace(CI_alp[0], CI_alp[1], ndisc)
    pk_alp = x_alp[np.argmax(kde_alp.evaluate(x_alp))]

    CI_bet = np.percentile(bet, CI_levs)
    kde_bet = stats.gaussian_kde(bet)
    ndisc = np.round((CI_bet[1] - CI_bet[0]) / 0.01)
    x_bet = np.linspace(CI_bet[0], CI_bet[1], ndisc)
    pk_bet = x_bet[np.argmax(kde_bet.evaluate(x_bet))]

    CI_sca = np.percentile(sca, CI_levs)
    kde_sca = stats.gaussian_kde(sca)
    ndisc = np.round((CI_sca[1] - CI_sca[0]) / 0.01)
    x_sca = np.linspace(CI_sca[0], CI_sca[1], ndisc)
    pk_sca = x_sca[np.argmax(kde_sca.evaluate(x_sca))]

    # dump these to a text file
    alp_str = 'alpha = %5.2f + %5.2f / - %5.2f' % \
              (pk_alp, CI_alp[1]-pk_alp, pk_alp-CI_alp[0])
    bet_str = 'beta = %5.2f + %5.2f / - %5.2f' % \
              (pk_bet, CI_bet[1]-pk_bet, pk_bet-CI_bet[0])
    sca_str = 'dispersion = %5.2f + %5.2f / - %5.2f' % \
              (pk_sca, CI_sca[1]-pk_sca, pk_sca-CI_sca[0])

    os.system('rm -rf outfiles/' + label + '.regression.txt')
    f = open('outfiles/' + label + '.regression.txt', 'w')
    f.write(label + '\n')
    f.write('\n')
    f.write(alp_str + '\n')
    f.write(bet_str + '\n')
    f.write(sca_str)
    f.close()

    # make a covariance plot
    os.system('rm -rf outfiles/' + label + '.corner.png')
    posts = np.column_stack([alp, bet, sca, cor])
    levs = 1. - np.exp(-0.5 * (np.arange(3) + 1)**2)
    fig = corner.corner(
        posts,
        plot_datapoints=False,
        levels=levs,
        labels=[r'$\alpha$', r'$\beta$', r'$\sigma$', r'$\varrho$'])
    fig.savefig('outfiles/' + label + '.corner.png')
    fig.clf()

    return lm.chain
Пример #5
0
x = []
xsig = []
y = []
ysig = []
for i in range(len(x1)):
    if str(x1[i]) != 'nan':
        if str(y1[i]) != 'nan':
            if str(x1sig[i]) != 'nan':
                if str(y1sig[i]) != 'nan':
                    x.append(x1[i])
                    y.append(y1[i])
                    xsig.append(x1sig[i])
                    ysig.append(y1sig[i])
corr_coeff, p_value_pearson = stats.pearsonr(x, y)
tau, p_value_kendall = stats.kendalltau(x, y)
lm = linmix.LinMix(x, y, xsig, ysig, K=2)
lm.run_mcmc(silent=True)
for i in range(0, len(lm.chain), 25):
    xs = np.arange(6, 13)
    ys = lm.chain[i]['alpha'] + xs * lm.chain[i]['beta']
    ax1.plot(xs, ys, color='r', alpha=0.02)
intercept_array = []
slope_array = []
for i in range(0, len(lm.chain), 25):
    intercept = lm.chain[i]['alpha']
    slope = lm.chain[i]['beta']
    intercept_array.append(intercept)
    slope_array.append(slope)
mean_intercept = np.mean(intercept_array)
std_intercept = np.std(intercept_array)
mean_slope = np.mean(slope_array)
X = random.sample(range(1, 100), 40)
Y = random.sample(range(1, 100), 40)
#this is a trick to have float errors spanning from the order of 10^2 to 10^-2
X_err = (1. * np.array(random.sample(range(1, 100), 40))) / (
    1. * np.array(random.sample(range(1, 100), 40)))
Y_err = (1. * np.array(random.sample(range(1, 100), 40))) / (
    1. * np.array(random.sample(range(1, 100), 40)))

# creates array since linmix works with arrays and not lists
Xfa = np.array(X)
Yfa = np.array(Y)
Xfa_err = np.array(X_err)
Yfa_err = np.array(Y_err)
#run a Monte carlo Markov Chain producing at least 5000 iterations (a.k.a. 5000 different
# linear regressions)
lm = linmix.LinMix(Xfa, Yfa, xsig=Xfa_err, ysig=Yfa_err, K=3)
lm.run_mcmc(miniter=5000, maxiter=100000, silent=True)

# print the average parameters several statistical
print ''
print " Pearson test ", stats.pearsonr(X, Y)
print "Spearman test ", stats.spearmanr(X, Y)
print ''
print '----- Bayesian linear regression with error in both X and Y -----'
print 'Beta = ', lm.chain['alpha'].mean(), '+/-', lm.chain['alpha'].std()
print 'Alpha = ', lm.chain['beta'].mean(), '+/-', lm.chain['beta'].std()
print 'Sigma = ', np.sqrt(lm.chain['sigsqr'].mean()), '+/-', np.sqrt(
    lm.chain['sigsqr'].std())
print 'Variance = ', lm.chain['sigsqr'].mean(), '+/-', lm.chain['sigsqr'].std()
print 'Correlation = ', lm.chain['corr'].mean(), '+/-', lm.chain['corr'].std()
Пример #7
0
Y = np.log10(Y)

# +
fig, ax = plt.subplots(figsize=(10, 10))
ax.errorbar(X, Y, xerr=Xe, yerr=Ye, ls=" ", alpha=0.3)
ax.scatter(X, Y, marker=".", s=20 / np.hypot(Xe, Ye))

#ax.set(
#    xlim=[-0.2, 0.8], ylim=[-0.2, 0.8],
#    xlabel=r"$\log_{10}\, \Pi$", ylabel=r"$\log_{10}\, \Lambda$",
#)
#ax.set_aspect("equal")
#sns.despine()
# -

lm = linmix.LinMix(X[:-1], Y[:-1], Xe[:-1], Ye[:-1], K=2)

lm.run_mcmc()

dfchain = pd.DataFrame.from_records(lm.chain.tolist(),
                                    columns=lm.chain.dtype.names)
dfchain

dfchain.describe()

pearsonr(X, Y)

pd.DataFrame({"X": X, "Xe": Xe, "Y": Y, "Ye": Ye}).describe()

# +
vmin, vmax = -3.0, 1.0
Пример #8
0
    w10 = (y < 10) & (ysig != 0)
    y[w10] = 10
    delta = np.ones((len(x), ),
                    dtype=int)  # should really be bool, but ints are easier
    delta[w10] = 0

    out = Table([x, y, xsig, ysig, delta],
                names=['x', 'y', 'xsig', 'ysig', 'delta'])
    import astropy.io.ascii as ascii
    ascii.write(out, 'test.dat')


# def run():
import astropy.io.ascii as ascii
try:
    a = ascii.read('test.dat')
except:
    generate_test_data()
    a = ascii.read('test.dat')

lm = linmix.LinMix(a['x'], a['y'], a['xsig'], a['ysig'], delta=a['delta'])
lm.run_mcmc()
ascii.write(
    lm.chain[[
        'alpha', 'beta', 'sigsqr', 'mu0', 'usqr', 'wsqr', 'ximean', 'xisig',
        'corr'
    ]], 'test.pyout')

# if __name__ == '__main__':
#     run()
Пример #9
0
    lmr_chain = N.load('%s_MBH_stellarmass.npy' % linmixfilebase)
    lmr_alpha = lmr_chain['alpha']
    lmr_beta = lmr_chain['beta']

    lmupl_chain = N.load('%s_MBH_bulge_withlimits.npy' % linmixfilebase)
    lmupl_alpha = lmupl_chain['alpha']
    lmupl_beta = lmupl_chain['beta']

    lmnupl_chain = N.load('%s_MBH_bulge_withoutlimits.npy' % linmixfilebase)
    lmnupl_alpha = lmnupl_chain['alpha']
    lmnupl_beta = lmnupl_chain['beta']

else:

    #Use linmix to fit to haring and rix x and ys
    lmhr = linmix.LinMix(xhr, yhr, xhrerr, yhrerr, K=2)
    lmhr.run_mcmc(silent=False, maxiter=5000)
    lmhr_alpha = lmhr.chain['alpha']
    lmhr_beta = lmhr.chain['beta']

    N.save('linmix_results_haringrixfit.npy', lmhr.chain)

    #Use linmix to fit to stellar mass and MBH relation from DISKDOM sample
    lmr = linmix.LinMix(results[mstarcol],
                        results[mbhcol],
                        results[dmstarcol],
                        results[dmbhcol],
                        K=2)
    lmr.run_mcmc(silent=False, maxiter=5000)
    lmr_alpha = lmr.chain['alpha']
    lmr_beta = lmr.chain['beta']