示例#1
0
文件: pls.py 项目: kieferkat/kk-tools
 def pls_train(self, X, Y, verbose=True):
     
     Xn = simple_normalize(X)
     
     pls = PLSRegression()
     
     if verbose:
         print 'fitting canonical pls...'
         
     pls.fit(Xn, Y)
     
     return pls
示例#2
0
def plss(X, y, cv, n_components=1):
    """
    """
    pls = PLSRegression(n_components=n_components)
    sse = np.zeros(y.shape[1])
    for train, test in cv:
        X_train, X_test = X[train], X[test]
        y_train, y_test = y[train], y[test]
        y0 = y_train.mean(0)
        X0 = X_train.mean(0)
        pls.fit(X_train - X0, y_train - y0)
        sse += np.sum((y_test - y0 - pls.predict(X_test - X0)) ** 2, 0)
    return sse
示例#3
0
def plss(X, y, cv, n_components=1):
    """
    """
    pls = PLSRegression(n_components=n_components)
    sse = np.zeros(y.shape[1])
    for train, test in cv:
        X_train, X_test = X[train], X[test]
        y_train, y_test = y[train], y[test]
        y0 = y_train.mean(0)
        X0 = X_train.mean(0)
        pls.fit(X_train - X0, y_train - y0)
        sse += np.sum((y_test - y0 - pls.predict(X_test - X0))**2, 0)
    return sse
示例#4
0
文件: linre23.py 项目: vrepina/linre
def pls_kfold( sample_set, kfold_group_count, max_components, preprocess ):
    print "load...";
    l = AttrDict(load('linre_big'+sample_set+'.npz'))
    disa = l.disa
    expa = l.expa
    Y = disa[:,None]
    X = l.flum.T
    X, Y, expa = shuffle(X, Y, expa, random_state=1)
    print "fix...";
    X_err, X = find_peaks(X,l.exa)
    pls = PLSRegression( scale=False, algorithm='svd' )
    pls.fit(X=X,Y=Y)
    PC = pls.transform(X.copy())
    PC1 = PC[:,0]
    good = PC1 > -PC1.std()*2
    X, Y, expa = X[good,:], Y[good,:], expa[good]
    if preprocess:
        X[X<0.5]=0.5
        X = X**0.25
    #save?
    print "cross-validation...";
    group_count = kfold_group_count(len(disa))
    Ypred4n_components = empty((len(Y),max_components))
    for n_components in arange(max_components)+1:
        Ypred = empty_like(Y)
        loo = KFold( n=len(Y), k=group_count, indices=False )
        for fit, test in loo:
            pls = PLSRegression( 
                scale=False, 
                algorithm='svd', 
                n_components=n_components 
            )
            pls.fit( X=X[fit].copy(), Y=Y[fit].copy() )
            Ypred[test] = pls.predict(X[test].copy())
        Ypred4n_components[:,n_components-1] = Ypred[:,0]
        print "done for "+str(n_components)+" components"
    savez('out23/'+preprocess+'pred.npz',
        X=X, Y=Y, expa=expa, Ypred4n_components=Ypred4n_components
    )
示例#5
0
pl.xticks(())
pl.yticks(())
pl.show()

###############################################################################
# PLS regression, with multivariate response, a.k.a. PLS2

n = 1000
q = 3
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
B = np.array([[1, 2] + [0] * (p - 2)] * q).T
# each Yj = 1*X1 + 2*X2 + noize
Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

pls2 = PLSRegression(n_components=3)
pls2.fit(X, Y)
print ("True B (such that: Y = XB + Err)")
print (B)
# compare pls2.coefs with B
print ("Estimated B")
print (np.round(pls2.coefs, 1))
pls2.predict(X)

###############################################################################
# PLS regression, with univariate response, a.k.a. PLS1

n = 1000
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
示例#6
0
 def dict2mean(X, dict):
     plsca = PLSRegression(n_components=np.shape(dict['coefs'])[0])
     plsca.x_mean_ = dict['x_mean']
     plsca.y_mean_ = dict['y_mean']
     plsca.coefs = dict['coefs'] 
     return plsca.predict(X)
示例#7
0
    np.corrcoef(Y_test_r[:, 0], Y_test_r[:, 1])[0, 1])
pl.legend()
pl.show()

###############################################################################
# PLS regression, with multivariate response, a.k.a. PLS2

n = 1000
q = 3
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
B = np.array([[1, 2] + [0] * (p - 2)] * q).T
# each Yj = 1*X1 + 2*X2 + noize
Y = np.dot(X, B) + np.random.normal(size=n * q).reshape((n, q)) + 5

pls2 = PLSRegression(n_components=3)
pls2.fit(X, Y)
print "True B (such that: Y = XB + Err)"
print B
# compare pls2.coefs with B
print "Estimated B"
print np.round(pls2.coefs, 1)
pls2.predict(X)

###############################################################################
# PLS regression, with univariate response, a.k.a. PLS1

n = 1000
p = 10
X = np.random.normal(size=n * p).reshape((n, p))
y = X[:, 0] + 2 * X[:, 1] + np.random.normal(size=n * 1) + 5
示例#8
0
yf = []
ym = []
ypca = []
yplsm = []
yplsf = []
X_fmri = scale(np.concatenate(good_data['fmri'], axis=1))
X_meg = scale(np.concatenate(good_data['meg'], axis=1))
for ncomp in max_comps:
    mse_fmri = []
    mse_meg = []
    mse_pca = []
    mse_plsm = []
    mse_plsf = []

    print 'Trying %d components' % ncomp
    plsca = PLSRegression(n_components=ncomp)
    dumb = DummyRegressor(strategy='mean')

    for oidx, (train, test) in enumerate(cv):
        X_fmri_train = X_fmri[train]
        X_fmri_test = X_fmri[test]
        X_meg_train = X_meg[train]
        X_meg_test = X_meg[test]
        y_train = sx.iloc[train].tolist()
        y_test = sx.iloc[test].tolist()

        pca = RandomizedPCA(n_components=ncomp, whiten=True)

        clf = LinearRegression().fit(pca.fit_transform(X_fmri_train), y_train)
        mse_fmri.append(mean_squared_error(clf.predict(pca.transform(X_fmri_test)), y_test))
示例#9
0
from sklearn.cross_validation import ShuffleSplit
from sklearn.pls import PLSRegression
from sklearn.metrics import mean_absolute_error
from sklearn.dummy import DummyRegressor

nobs = X_meg.shape[0]
max_comps = range(2, 30, 2)
nfolds = 50
cv = ShuffleSplit(nobs, n_iter=nfolds, test_size=.1)

# Trying the prediction with different components
comp_scores = []
dumb_scores = []
for ncomp in max_comps:
    print 'Trying %d components' % ncomp
    pls = PLSRegression(n_components=ncomp)
    dumb = DummyRegressor(strategy='mean')

    mae = 0
    dumb_mae = 0
    for oidx, (train, test) in enumerate(cv):
        X_fmri_train = X_fmri[train]
        X_fmri_test = X_fmri[test]
        X_meg_train = X_meg[train]
        X_meg_test = X_meg[test]

        pls.fit(X_fmri_train, X_meg_train)
        pred = pls.predict(X_fmri_test)

        mae += mean_absolute_error(X_meg_test, pred)
示例#10
0
from dataset_creator import DatasetCreator

params = {"LAMBDA": 0.4, "dimension": 4096}

c = DatasetCreator(dtk_params=params, encoder_params=[4096, 3])

D = c.get_d()

n = len(D[0])

print(D[1])

train_X, train_Y = D[0][:n / 2], D[1][:n / 2]
test_X, test_Y = D[0][n / 2:], D[1][n / 2:]

pls2 = PLSRegression()
pls2.fit(train_X, train_Y)

#print(pls2.coefs)

pred = pls2.predict(test_X)

mean_err = np.mean((pred - test_Y)**2)

print(mean_err)

mean_cos = 0
mean_cos_original = 0

for i, j in zip(pred, test_Y):
    mean_cos = mean_cos + np.dot(i, j) / np.sqrt(np.dot(i, i) * np.dot(j, j))
from sklearn.metrics import mean_absolute_error
from sklearn.dummy import DummyRegressor

nobs = X_meg.shape[0]
max_comps = range(5,30,5)
nfolds=50
cv = ShuffleSplit(nobs,n_iter=nfolds,test_size=.1)
y = inatt

# Trying the prediction with different components
comp_scores = []
dumb_scores = []
meg_scores, fmri_scores = [], []
for ncomp in max_comps:
    print 'Trying %d components'%ncomp
    pls = PLSRegression(n_components=ncomp)
    dumb = DummyRegressor(strategy='mean')

    mae = 0
    dumb_mae = 0
    meg_mae, fmri_mae = 0, 0
    for oidx, (train, test) in enumerate(cv):
        X_fmri_train = X_fmri[train]
        X_fmri_test = X_fmri[test]
        X_meg_train = X_meg[train]
        X_meg_test = X_meg[test]
        y_train = y[train]
        y_test = y[test]

        X_train = np.hstack([X_fmri_train,X_meg_train])
        X_test = np.hstack([X_fmri_test,X_meg_test])
示例#12
0
文件: linre28.py 项目: vrepina/linre
mds = AttrDict(load("out23/pred.npz"))
X, Y, expa = mds.X, mds.Y, mds.expa

X[X<0.5]=0.5 #0.7
X = X**0.25

group_count = 11
n_components_list = range(9,20)

for n_components in n_components_list:
    Ypred = empty_like(Y)
    loo = KFold( n=len(Y), k=group_count, indices=False )
    for fit, test in loo:
        pls = PLSRegression( 
            scale=False, 
            algorithm='svd', 
            n_components=n_components 
        )
        pls.fit( X=X[fit].copy(), Y=Y[fit].copy() )
        Ypred[test] = pls.predict(X[test].copy())
    print n_components, RMSEP(Y[:,0],Ypred[:,0])

#n, bins, patches = plt.hist(X.flatten(),40,range=(0,2))
#plt.show()

"""
stuff:
print [v for v in X.flatten() if v<-0.4] #only 3 numbers from X <-0.4
X = (1-X)**2/X*2 #Kubelka-Munk function
"""
"""-
示例#13
0
def test_predictions():

    d = load_linnerud()
    X = d.data
    Y = d.target
    tol = 5e-12
    miter = 1000
    num_comp = 2
    Xorig = X.copy()
    Yorig = Y.copy()
#    SSY = np.sum(Yorig**2)
#    center = True
    scale  = False


    pls1 = PLSRegression(n_components = num_comp, scale = scale,
                 tol = tol, max_iter = miter, copy = True)
    pls1.fit(Xorig, Yorig)
    Yhat1 = pls1.predict(Xorig)

    SSYdiff1 = np.sum((Yorig-Yhat1)**2)
#    print "PLSRegression: R2Yhat = %.4f" % (1 - (SSYdiff1 / SSY))

    # Compare PLSR and sklearn.PLSRegression
    pls3 = PLSR(num_comp = num_comp, center = True, scale = scale,
                tolerance = tol, max_iter = miter)
    pls3.fit(X, Y)
    Yhat3 = pls3.predict(X)

    assert_array_almost_equal(Yhat1, Yhat3, decimal = 5,
            err_msg = "PLSR gives wrong prediction")

    SSYdiff3 = np.sum((Yorig-Yhat3)**2)
#    print "PLSR         : R2Yhat = %.4f" % (1 - (SSYdiff3 / SSY))

    assert abs(SSYdiff1 - SSYdiff3) < 0.00005


    pls2 = PLSCanonical(n_components = num_comp, scale = scale,
                        tol = tol, max_iter = miter, copy = True)
    pls2.fit(Xorig, Yorig)
    Yhat2 = pls2.predict(Xorig)

    SSYdiff2 = np.sum((Yorig-Yhat2)**2)
#    print "PLSCanonical : R2Yhat = %.4f" % (1 - (SSYdiff2 / SSY))

    # Compare PLSC and sklearn.PLSCanonical
    pls4 = PLSC(num_comp = num_comp, center = True, scale = scale,
                tolerance = tol, max_iter = miter)
    pls4.fit(X, Y)
    Yhat4 = pls4.predict(X)

    SSYdiff4 = np.sum((Yorig-Yhat4)**2)
#    print "PLSC         : R2Yhat = %.4f" % (1 - (SSYdiff4 / SSY))

    # Compare O2PLS and sklearn.PLSCanonical
    pls5 = O2PLS(num_comp = [num_comp, 1, 0], center = True, scale = scale,
                 tolerance = tol, max_iter = miter)
    pls5.fit(X, Y)
    Yhat5 = pls5.predict(X)

    SSYdiff5 = np.sum((Yorig-Yhat5)**2)
#    print "O2PLS        : R2Yhat = %.4f" % (1 - (SSYdiff5 / SSY))

    assert abs(SSYdiff2 - SSYdiff4) < 0.00005
    assert SSYdiff2 > SSYdiff5
示例#14
0
def pls(coords, intens):
	print PLSRegression().fit(coords, intens)
示例#15
0
文件: linre21.py 项目: vrepina/linre
disa = l['disa']
exa = l['exa']
expa = l['expa']

Y = disa[:,None]
X = flum.T

X, Y, expa = shuffle(X, Y, expa, random_state=1)

print "fix peaks..."

X_err, X = find_peaks(X,exa)

print "fix outliers..."

pls = PLSRegression( scale=False, algorithm='svd' )
pls.fit(X=X,Y=Y)
PC = pls.transform(X.copy())
PC1, PC2 = PC[:,0], PC[:,1]
good = PC1 > -PC1.std()*2
plot_scores(fn='_bad_1', expa=expa, x=PC1,y=PC2, xl='T1',yl='T2', title=', bad')
print expa[logical_not(good)]
X, Y, expa = X[good,:], Y[good,:], expa[good]

print "preprocess with power..."

if preprocess:
    X[X<0.5]=0.5
    X = X**0.25

print "fit..."
示例#16
0
 def dict2mean(X, dict):
     plsca = PLSRegression(n_components=np.shape(dict['coefs'])[0])
     plsca.x_mean_ = dict['x_mean']
     plsca.y_mean_ = dict['y_mean']
     plsca.coefs = dict['coefs']
     return plsca.predict(X)
示例#17
0
	reader.next()

	data =np.array(filter(lambda row: '' not in row and 'NA' not in row and '?' not in row,
			[[convert(row[i]) for i in good_cols] for row in reader if 'evd' in row])).astype(float)
	data = data[~np.isnan(data).any(axis=1)]
	loc = data[:,9]
	tumor = data[:,4]
	#data = (data- data.min(axis=0))/(data.max(axis=0)-data.min(axis=0))
	y = data[:,7]
	x = np.delete(data,7,1)

x = (x-x.min(axis=0))/(x.max(axis=0)-x.min(axis=0))

print x.shape

pls1 = PLSRegression(n_components = x.shape[1])
pls1.fit(x,y)


cfs = np.nan_to_num(np.log(pls1.coefs))

fig, (coeffs,dist) = plt.subplots(nrows=1,ncols=2)
coeffs.barh(range(len(cfs)),cfs, edgecolor='k',
		color = ['r' if x<0 else 'g' for x in cfs],
		linewidth=1)
artist.adjust_spines(coeffs)
coeffs.axvline(x=0,color='k',linestyle='--',linewidth=2)
coeffs.axvline(x=-5.3,color='r',linestyle='--',linewidth=1)
coeffs.axvline(x=5.3,color='r',linestyle='--',linewidth=1)
coeffs.set_xlabel(r'\Large \textbf{Importance} $\left(\log \beta\right)$')
coeffs.set_yticks(range(len(labels)))