示例#1
0
文件: linre23.py 项目: vrepina/linre
def pls_kfold( sample_set, kfold_group_count, max_components, preprocess ):
    print "load...";
    l = AttrDict(load('linre_big'+sample_set+'.npz'))
    disa = l.disa
    expa = l.expa
    Y = disa[:,None]
    X = l.flum.T
    X, Y, expa = shuffle(X, Y, expa, random_state=1)
    print "fix...";
    X_err, X = find_peaks(X,l.exa)
    pls = PLSRegression( scale=False, algorithm='svd' )
    pls.fit(X=X,Y=Y)
    PC = pls.transform(X.copy())
    PC1 = PC[:,0]
    good = PC1 > -PC1.std()*2
    X, Y, expa = X[good,:], Y[good,:], expa[good]
    if preprocess:
        X[X<0.5]=0.5
        X = X**0.25
    #save?
    print "cross-validation...";
    group_count = kfold_group_count(len(disa))
    Ypred4n_components = empty((len(Y),max_components))
    for n_components in arange(max_components)+1:
        Ypred = empty_like(Y)
        loo = KFold( n=len(Y), k=group_count, indices=False )
        for fit, test in loo:
            pls = PLSRegression( 
                scale=False, 
                algorithm='svd', 
                n_components=n_components 
            )
            pls.fit( X=X[fit].copy(), Y=Y[fit].copy() )
            Ypred[test] = pls.predict(X[test].copy())
        Ypred4n_components[:,n_components-1] = Ypred[:,0]
        print "done for "+str(n_components)+" components"
    savez('out23/'+preprocess+'pred.npz',
        X=X, Y=Y, expa=expa, Ypred4n_components=Ypred4n_components
    )
示例#2
0
文件: linre21.py 项目: vrepina/linre
expa = l['expa']

Y = disa[:,None]
X = flum.T

X, Y, expa = shuffle(X, Y, expa, random_state=1)

print "fix peaks..."

X_err, X = find_peaks(X,exa)

print "fix outliers..."

pls = PLSRegression( scale=False, algorithm='svd' )
pls.fit(X=X,Y=Y)
PC = pls.transform(X.copy())
PC1, PC2 = PC[:,0], PC[:,1]
good = PC1 > -PC1.std()*2
plot_scores(fn='_bad_1', expa=expa, x=PC1,y=PC2, xl='T1',yl='T2', title=', bad')
print expa[logical_not(good)]
X, Y, expa = X[good,:], Y[good,:], expa[good]

print "preprocess with power..."

if preprocess:
    X[X<0.5]=0.5
    X = X**0.25

print "fit..."
a4fit = arange(len(X)) >= samples_in_testing_set
a4test = logical_not(a4fit)
示例#3
0
        pca = RandomizedPCA(n_components=ncomp, whiten=True)

        clf = LinearRegression().fit(pca.fit_transform(X_fmri_train), y_train)
        mse_fmri.append(mean_squared_error(clf.predict(pca.transform(X_fmri_test)), y_test))

        clf = LinearRegression().fit(pca.fit_transform(X_meg_train), y_train)
        mse_meg.append(mean_squared_error(clf.predict(pca.transform(X_meg_test)), y_test))

        both_train = np.hstack([X_meg_train, X_fmri_train])
        both_test = np.hstack([X_meg_test, X_fmri_test])

        clf = LinearRegression().fit(pca.fit_transform(both_train), y_train)
        mse_pca.append(mean_squared_error(clf.predict(pca.transform(both_test)), y_test))

        plsca.fit(X_meg_train, X_fmri_train)
        X_mc_train, X_fc_train = plsca.transform(X_meg_train, X_fmri_train)
        X_mc_test, X_fc_test = plsca.transform(X_meg_test, X_fmri_test)
        clf = LinearRegression().fit(X_mc_train, y_train)
        mse_plsm.append(mean_squared_error(clf.predict(X_mc_test), y_test))
        mse_plsf.append(mean_squared_error(clf.predict(X_fc_test), y_test))

        # dumb.fit(X_fmri_train, X_meg_train)
        # dumb_pred = dumb.predict(X_fmri_test)
        # dumb_mae += mean_absolute_error(X_meg_test,dumb_pred)

    yf.append(np.sqrt(np.mean(mse_fmri)))
    ym.append(np.sqrt(np.mean(mse_meg)))
    ypca.append(np.sqrt(np.mean(mse_pca)))
    yplsm.append(np.sqrt(np.mean(mse_plsm)))
    yplsf.append(np.sqrt(np.mean(mse_plsf)))
    # dumb_scores.append(dumb_mae/nfolds)