示例#1
0
def plss(X, y, cv, n_components=1):
    """
    """
    pls = PLSRegression(n_components=n_components)
    sse = np.zeros(y.shape[1])
    for train, test in cv:
        X_train, X_test = X[train], X[test]
        y_train, y_test = y[train], y[test]
        y0 = y_train.mean(0)
        X0 = X_train.mean(0)
        pls.fit(X_train - X0, y_train - y0)
        sse += np.sum((y_test - y0 - pls.predict(X_test - X0)) ** 2, 0)
    return sse
示例#2
0
def plss(X, y, cv, n_components=1):
    """
    """
    pls = PLSRegression(n_components=n_components)
    sse = np.zeros(y.shape[1])
    for train, test in cv:
        X_train, X_test = X[train], X[test]
        y_train, y_test = y[train], y[test]
        y0 = y_train.mean(0)
        X0 = X_train.mean(0)
        pls.fit(X_train - X0, y_train - y0)
        sse += np.sum((y_test - y0 - pls.predict(X_test - X0))**2, 0)
    return sse
示例#3
0
文件: linre23.py 项目: vrepina/linre
def pls_kfold( sample_set, kfold_group_count, max_components, preprocess ):
    print "load...";
    l = AttrDict(load('linre_big'+sample_set+'.npz'))
    disa = l.disa
    expa = l.expa
    Y = disa[:,None]
    X = l.flum.T
    X, Y, expa = shuffle(X, Y, expa, random_state=1)
    print "fix...";
    X_err, X = find_peaks(X,l.exa)
    pls = PLSRegression( scale=False, algorithm='svd' )
    pls.fit(X=X,Y=Y)
    PC = pls.transform(X.copy())
    PC1 = PC[:,0]
    good = PC1 > -PC1.std()*2
    X, Y, expa = X[good,:], Y[good,:], expa[good]
    if preprocess:
        X[X<0.5]=0.5
        X = X**0.25
    #save?
    print "cross-validation...";
    group_count = kfold_group_count(len(disa))
    Ypred4n_components = empty((len(Y),max_components))
    for n_components in arange(max_components)+1:
        Ypred = empty_like(Y)
        loo = KFold( n=len(Y), k=group_count, indices=False )
        for fit, test in loo:
            pls = PLSRegression( 
                scale=False, 
                algorithm='svd', 
                n_components=n_components 
            )
            pls.fit( X=X[fit].copy(), Y=Y[fit].copy() )
            Ypred[test] = pls.predict(X[test].copy())
        Ypred4n_components[:,n_components-1] = Ypred[:,0]
        print "done for "+str(n_components)+" components"
    savez('out23/'+preprocess+'pred.npz',
        X=X, Y=Y, expa=expa, Ypred4n_components=Ypred4n_components
    )
示例#4
0
 def dict2mean(X, dict):
     plsca = PLSRegression(n_components=np.shape(dict['coefs'])[0])
     plsca.x_mean_ = dict['x_mean']
     plsca.y_mean_ = dict['y_mean']
     plsca.coefs = dict['coefs']
     return plsca.predict(X)
示例#5
0
文件: linre21.py 项目: vrepina/linre
if preprocess:
    X[X<0.5]=0.5
    X = X**0.25

print "fit..."
a4fit = arange(len(X)) >= samples_in_testing_set
a4test = logical_not(a4fit)
X4fit,  Y4fit,  expa4fit  = X[a4fit ,:], Y[a4fit ,:], expa[a4fit ]
X4test, Y4test, expa4test = X[a4test,:], Y[a4test,:], expa[a4test]

pls = PLSRegression(n_components=n_components,algorithm='svd',scale=False)
pls.fit(X=X4fit,Y=Y4fit)

print "predict..."

Y_pred = pls.predict(X4test.copy())

dis4test = Y4test[:,0]
dis_pred = Y_pred[:,0]
dis_max = max(disa)
#dis_pred = where(dis_pred<dis_max+1,where(dis_pred<-1,-1,dis_pred),dis_max+1)

persons = Persons(expa4test)

#print ia4test, ia4fit, logical_not(a4fit & good_std)
#print expa4test.shape, dis4test.shape, dis_pred.shape, Y.shape, Y4test.shape, Y_pred.shape

plt.plot([0,dis_max],[0,dis_max],'g-')
persons.plot(plt,dis4test,dis_pred)
plt.savefig(out_pre+"pred.png")
plt.cla()
示例#6
0
n = 1000
q = 3
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
B = np.array([[1, 2] + [0] * (p - 2)] * q).T
# each Yj = 1*X1 + 2*X2 + noize
Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

pls2 = PLSRegression(n_components=3)
pls2.fit(X, Y)
print ("True B (such that: Y = XB + Err)")
print (B)
# compare pls2.coefs with B
print ("Estimated B")
print (np.round(pls2.coefs, 1))
pls2.predict(X)

###############################################################################
# PLS regression, with univariate response, a.k.a. PLS1

n = 1000
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
pls1 = PLSRegression(n_components=3)
pls1.fit(X, y)
# note that the number of compements exceeds 1 (the dimension of y)
print ("Estimated betas")
print (np.round(pls1.coefs, 1))

###############################################################################
示例#7
0
 def dict2mean(X, dict):
     plsca = PLSRegression(n_components=np.shape(dict['coefs'])[0])
     plsca.x_mean_ = dict['x_mean']
     plsca.y_mean_ = dict['y_mean']
     plsca.coefs = dict['coefs'] 
     return plsca.predict(X)
示例#8
0
n = 1000
q = 3
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
B = np.array([[1, 2] + [0] * (p - 2)] * q).T
# each Yj = 1*X1 + 2*X2 + noize
Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

pls2 = PLSRegression(n_components=3)
pls2.fit(X, Y)
print "True B (such that: Y = XB + Err)"
print B
# compare pls2.coefs with B
print "Estimated B"
print np.round(pls2.coefs, 1)
pls2.predict(X)

###############################################################################
# PLS regression, with univariate response, a.k.a. PLS1

n = 1000
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
pls1 = PLSRegression(n_components=3)
pls1.fit(X, y)
# note that the number of compements exceeds 1 (the dimension of y)
print "Estimated betas"
print np.round(pls1.coefs, 1)

###############################################################################
示例#9
0
dumb_scores = []
for ncomp in max_comps:
    print 'Trying %d components' % ncomp
    pls = PLSRegression(n_components=ncomp)
    dumb = DummyRegressor(strategy='mean')

    mae = 0
    dumb_mae = 0
    for oidx, (train, test) in enumerate(cv):
        X_fmri_train = X_fmri[train]
        X_fmri_test = X_fmri[test]
        X_meg_train = X_meg[train]
        X_meg_test = X_meg[test]

        pls.fit(X_fmri_train, X_meg_train)
        pred = pls.predict(X_fmri_test)

        mae += mean_absolute_error(X_meg_test, pred)

        dumb.fit(X_fmri_train, X_meg_train)
        dumb_pred = dumb.predict(X_fmri_test)
        dumb_mae += mean_absolute_error(X_meg_test, dumb_pred)

    comp_scores.append(mae / nfolds)
    dumb_scores.append(dumb_mae / nfolds)

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.plot(max_comps, comp_scores, max_comps, dumb_scores)
t_str = seed + str(band)
示例#10
0
c = DatasetCreator(dtk_params=params, encoder_params=[4096, 3])

D = c.get_d()

n = len(D[0])

print(D[1])

train_X, train_Y = D[0][:n / 2], D[1][:n / 2]
test_X, test_Y = D[0][n / 2:], D[1][n / 2:]

pls2 = PLSRegression()
pls2.fit(train_X, train_Y)

#print(pls2.coefs)

pred = pls2.predict(test_X)

mean_err = np.mean((pred - test_Y)**2)

print(mean_err)

mean_cos = 0
mean_cos_original = 0

for i, j in zip(pred, test_Y):
    mean_cos = mean_cos + np.dot(i, j) / np.sqrt(np.dot(i, i) * np.dot(j, j))

print(mean_cos / n)
    mae = 0
    dumb_mae = 0
    meg_mae, fmri_mae = 0, 0
    for oidx, (train, test) in enumerate(cv):
        X_fmri_train = X_fmri[train]
        X_fmri_test = X_fmri[test]
        X_meg_train = X_meg[train]
        X_meg_test = X_meg[test]
        y_train = y[train]
        y_test = y[test]

        X_train = np.hstack([X_fmri_train,X_meg_train])
        X_test = np.hstack([X_fmri_test,X_meg_test])
        
        pls.fit(X_train, y_train)
        pred = pls.predict(X_test)

        mae += mean_absolute_error(y_test, pred)

        dumb.fit(X_train, y_train)
        dumb_pred = dumb.predict(X_test)
        dumb_mae += mean_absolute_error(y_test,dumb_pred)

        if within:
            pls.fit(X_fmri_train, y_train)
            pred = pls.predict(X_fmri_test)
            fmri_mae += mean_absolute_error(y_test, pred)

            pls.fit(X_meg_train, y_train)
            pred = pls.predict(X_meg_test)
            meg_mae += mean_absolute_error(y_test, pred)
示例#12
0
dumb_scores = []
for ncomp in max_comps:
    print 'Trying %d components'%ncomp
    pls = PLSRegression(n_components=ncomp)
    dumb = DummyRegressor(strategy='mean')

    mae = 0
    dumb_mae = 0
    for oidx, (train, test) in enumerate(cv):
        X_fmri_train = X_fmri[train]
        X_fmri_test = X_fmri[test]
        X_meg_train = X_meg[train]
        X_meg_test = X_meg[test]
        
        pls.fit(X_fmri_train, X_meg_train)
        pred = pls.predict(X_fmri_test)

        mae += mean_absolute_error(X_meg_test, pred)

        dumb.fit(X_fmri_train, X_meg_train)
        dumb_pred = dumb.predict(X_fmri_test)
        dumb_mae += mean_absolute_error(X_meg_test,dumb_pred)

    comp_scores.append(mae/nfolds)
    dumb_scores.append(dumb_mae/nfolds)

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
plt.plot(max_comps,comp_scores,max_comps,dumb_scores)
t_str = seed + str(band)
示例#13
0
文件: linre28.py 项目: vrepina/linre
X = X**0.25

group_count = 11
n_components_list = range(9,20)

for n_components in n_components_list:
    Ypred = empty_like(Y)
    loo = KFold( n=len(Y), k=group_count, indices=False )
    for fit, test in loo:
        pls = PLSRegression( 
            scale=False, 
            algorithm='svd', 
            n_components=n_components 
        )
        pls.fit( X=X[fit].copy(), Y=Y[fit].copy() )
        Ypred[test] = pls.predict(X[test].copy())
    print n_components, RMSEP(Y[:,0],Ypred[:,0])

#n, bins, patches = plt.hist(X.flatten(),40,range=(0,2))
#plt.show()

"""
stuff:
print [v for v in X.flatten() if v<-0.4] #only 3 numbers from X <-0.4
X = (1-X)**2/X*2 #Kubelka-Munk function
"""
"""-
5 0.407984567727
6 0.354843551016
7 0.340217332243
8 0.328329231934
示例#14
0
def test_predictions():

    d = load_linnerud()
    X = d.data
    Y = d.target
    tol = 5e-12
    miter = 1000
    num_comp = 2
    Xorig = X.copy()
    Yorig = Y.copy()
#    SSY = np.sum(Yorig**2)
#    center = True
    scale  = False


    pls1 = PLSRegression(n_components = num_comp, scale = scale,
                 tol = tol, max_iter = miter, copy = True)
    pls1.fit(Xorig, Yorig)
    Yhat1 = pls1.predict(Xorig)

    SSYdiff1 = np.sum((Yorig-Yhat1)**2)
#    print "PLSRegression: R2Yhat = %.4f" % (1 - (SSYdiff1 / SSY))

    # Compare PLSR and sklearn.PLSRegression
    pls3 = PLSR(num_comp = num_comp, center = True, scale = scale,
                tolerance = tol, max_iter = miter)
    pls3.fit(X, Y)
    Yhat3 = pls3.predict(X)

    assert_array_almost_equal(Yhat1, Yhat3, decimal = 5,
            err_msg = "PLSR gives wrong prediction")

    SSYdiff3 = np.sum((Yorig-Yhat3)**2)
#    print "PLSR         : R2Yhat = %.4f" % (1 - (SSYdiff3 / SSY))

    assert abs(SSYdiff1 - SSYdiff3) < 0.00005


    pls2 = PLSCanonical(n_components = num_comp, scale = scale,
                        tol = tol, max_iter = miter, copy = True)
    pls2.fit(Xorig, Yorig)
    Yhat2 = pls2.predict(Xorig)

    SSYdiff2 = np.sum((Yorig-Yhat2)**2)
#    print "PLSCanonical : R2Yhat = %.4f" % (1 - (SSYdiff2 / SSY))

    # Compare PLSC and sklearn.PLSCanonical
    pls4 = PLSC(num_comp = num_comp, center = True, scale = scale,
                tolerance = tol, max_iter = miter)
    pls4.fit(X, Y)
    Yhat4 = pls4.predict(X)

    SSYdiff4 = np.sum((Yorig-Yhat4)**2)
#    print "PLSC         : R2Yhat = %.4f" % (1 - (SSYdiff4 / SSY))

    # Compare O2PLS and sklearn.PLSCanonical
    pls5 = O2PLS(num_comp = [num_comp, 1, 0], center = True, scale = scale,
                 tolerance = tol, max_iter = miter)
    pls5.fit(X, Y)
    Yhat5 = pls5.predict(X)

    SSYdiff5 = np.sum((Yorig-Yhat5)**2)
#    print "O2PLS        : R2Yhat = %.4f" % (1 - (SSYdiff5 / SSY))

    assert abs(SSYdiff2 - SSYdiff4) < 0.00005
    assert SSYdiff2 > SSYdiff5