Пример #1
0
    def test_clean_ambiguities(self):
        """A check on the 'clean_ambiguities' and 'remove_dualities' methods.

	"""
        a = pred_vec.PredLog()
        trial = a.clean_ambiguities(EG_LEGMAT, EG_SCORES_RAW, EG_Y)
        self.assertTrue((EG_FIXED_Y == trial).all())
def compare_train_CV_logistic(Xtr,
                              ytr,
                              Xcv,
                              ycv,
                              degree,
                              description,
                              C=1e5,
                              scale=False,
                              filestump='Cf_log_01'):
    """

	"""
    #Xtr = create_array_poly( Xtr, degree )
    #Xcv = create_array_poly( Xcv, degree )
    a1 = pred_vec.PredLog(X=Xtr, y=ytr, C=C)
    #if scale:
    #    a1.fscale()
    #a1.X = np.abs( a1.X )
    a1.fit()
    a1.binary_predict(a1.X)
    print_output(a1, ytr, description, 'train', filestump, degree, C)
    a1.X = Xcv
    #if scale:
    #    a1.fscale()
    a1.binary_predict(a1.X)
    print_output(a1, ycv, description, 'CV', filestump, degree, C)
def test_metrics(Xtest,
                 ytest,
                 degree,
                 description,
                 C=1e5,
                 scale=False,
                 filestump='Cf_log_01'):

    a1 = pred_vec.PredLog(X=Xtr, y=ytr, C=C)
    a1.fit()
    a1.binary_predict(a1.X)
    print_output(a1, ytr, description, 'train', filestump, degree, C)
Пример #4
0
    def test_binary_predict(self):
        """A lightweigth check on the 'binary_predict' method.
	Assumes that arithmetic is close enough across systems to generate identical predictions
	A more thorough test would require fitting of data and is probably unnecessary
	"""
        a = pred_vec.PredLog(C=1e5)
        a.theta = theta
        a.bias = bias
        a.classes = classes
        a.binary_predict(X)
        #check = ( y == a.y_pred ).all()
        predind = np.nonzero(a.y_pred == 1)[0]
        check = (predind == y_pred_posindices_expected).all()
        self.assertTrue(check)
        self.assertTrue(a.y_pred.ndim == 1)
        self.assertTrue((a.y_pred == a.y_pred_logistic).all())
def compare_train_CV_logistic( Xtr, ytr, Xcv, ycv, degree, description, C=1e5, scale=False ):
	"""

	"""
	Xtr = create_array_poly( Xtr, degree )
	Xcv = create_array_poly( Xcv, degree )
	a1 = pred_vec.PredLog( X=Xtr, y=ytr, C = C )
	if scale:
	    a1.fscale()
	a1.fit()
	a1.binary_predict( a1.X )
	print_output( a1, ytr, description + ' Training metrics' )
	a1.X = Xcv
	if scale:
	    a1.fscale()
	a1.binary_predict( a1.X )
	print_output( a1, ycv, description + ' Cross-Validation metrics' )
Пример #6
0
import numpy as np
import os
from vec_hsqc import pred_vec

curdir = os.path.dirname(os.path.abspath(__file__))

#X = np.zeros( (500,5) )
#y = np.zeros( 500 )

X = np.loadtxt(os.path.join(curdir, '140225_composite_X.npy'))
y = np.loadtxt(os.path.join(curdir, '140225_composite_Y.npy'))

a1 = pred_vec.PredLog(X=X, y=y)

a1.fit()

a1.binary_predict(a1.X)

print 'Unscaled, full feature set\n\n'

print 'scores min =', np.min(a1.scores)

print 'scores max =', np.max(a1.scores)

a1._standard_measures_binary(y, a1.y_pred, verbose=True)

print '\n\nNonregularized cost function =', a1._cost_function_nonreg(
    y, a1.scores_logistic), '\n\n'

print '\n', '-' * 30, '\n\n'
    a1.fit()
    a1.binary_predict(a1.X)
    print_output(a1, ytr, description, 'train', filestump, degree, C)


#Xtr_abr = np.abs( np.hstack( [ X_train[:,1:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) )
#Xcv_abr = np.abs( np.hstack( [ X_CV[:,1:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) )

#X_train = np.abs( np.hstack( [ X_train[:,:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) )
#X_CV = np.abs( np.hstack( [ X_CV[:,:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) )

#X = create_array_poly( X_test, 2)

if __name__ == '__main__':

    X_test = create_array_poly(X_test, 2)
    a1 = pred_vec.PredLog(X=X_test)
    a1.fscale()
    a1.theta = THETA
    a1.bias = BIAS
    a1.binary_predict(a1.X)
    a1.y_pred = a1.clean_ambiguities(legmat, a1.scores_logistic,
                                     a1.y_pred_logistic)
    print_output(a1, y_test, 'test set', 'test', 'crap01', 2, 1000)
    a2 = post_proc.DataOut()
    a2.generate_master_peak_list(a1.y_pred, legmat, csarray_test)
    np.savetxt(os.path.join(curdir, 'master_array_new.npy'),
               a2.master_array,
               fmt="%s")
    a2.writeall_peak_lists(a2.master_array, curdir, 'test_set_prediction_new_')
X = mat(c_[X[:500000, :]])
y = c_[y[:500000]]
legmat = legmat[:500000, :]

np.savetxt(os.path.join(curdir, 'first500k_X.npy'), X)
np.savetxt(os.path.join(curdir, 'first500k_y.npy'), y)
np.savetxt(os.path.join(curdir, 'first500k_legmat.npy'), legmat, fmt="%s")

#X = np.loadtxt( 'first500k_X.npy' )
#y = np.loadtxt( 'first500k_y.npy' )
#legmat = np.loadtxt( 'first500k_legmat.npy', dtype = 'str' )

print X.shape, y.shape, legmat.shape

#y = np.loadtxt( 'pred_eg_01_Y' )

X = mat(c_[np.hstack([np.reshape(np.ones(X.shape[0]), (X.shape[0], 1)), X])])

print X.shape

a = pred_vec.PredLog(X, y)
a.fscale()
a.train_classifier()
a.make_prediction()

np.savetxt(os.path.join(curdir, 'scaled_x_500k'), a.X)

np.savetxt(os.path.join(curdir, 'first500k_predicted_Y'), a.pred_Y)
np.savetxt(os.path.join(curdir, 'first500k_prediction'), a.pred)
Пример #9
0
	a1.binary_predict( a1.X )
	print_output( a1, ycv, description, 'CV', filestump, degree, C  )


#Xtr_abr = np.abs( np.hstack( [ X_train[:,1:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) )
#Xcv_abr = np.abs( np.hstack( [ X_CV[:,1:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) )

X_train = np.abs( np.hstack( [ X_train[:,:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) )
X_CV = np.abs( np.hstack( [ X_CV[:,:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) )

if __name__ == '__main__':
    for i in range(1,3):
    	Xtr_abr = create_array_poly( Xtr_abr, i )
    	Xcv_abr = create_array_poly( Xcv_abr, i )

    	a1 = pred_vec.PredLog( X=Xtr_abr )
    	a1.fscale()
	
    	a2 = pred_vec.PredLog( X=Xcv_abr )
    	a2.fscale()

    	for Cval in ( 1e15, 1e12, 1e10, 1e8, 1e6, 1e5, 1e3, 1e2, 5e1, 1.25e1, 2.5e0, 1e0, 2.5e-1, 5e-1, 1e-2, 1e-3):  
           compare_train_CV_logistic( a1.X, y_train, a2.X, y_CV, i, 'Scaled abridged feature set, degree = ' + str(i) + ',  C = ' + str(Cval), C=Cval, scale=True, filestump = 'absval_log_new_01' )


    for i in range(1,3):
	X_train = create_array_poly( X_train, i )
	X_CV = create_array_poly( X_CV, i )
	#Xtr_abr = create_array_poly( Xtr_abr, degree )
	#Xcv_abr = create_array_poly( Xcv_abr, degree )
Пример #10
0
import numpy as np
import os
from vec_hsqc import pred_vec

curdir = os.path.dirname(os.path.abspath(__file__))

#X = np.zeros( (500,5) )
#y = np.zeros( 500 )

X = np.loadtxt(os.path.join(curdir, '140225_composite_X.npy'))
y = np.loadtxt(os.path.join(curdir, ))

a1 = pred_vec.PredLog(X, y=y, C=1e5)

a1.fit()

a1.binary_predict(a1.X)

a1._standard_measures_binary(a1.y, a1.y_pred)
Пример #11
0
import numpy as np
import os
from vec_hsqc import pred_vec

curdir = os.path.dirname(os.path.abspath(__file__))

#X = np.zeros( (500,5) )
#y = np.zeros( 500 )

X = np.loadtxt(os.path.join(curdir, 'query_eg_01_X'))
y = np.loadtxt(os.path.join(curdir, 'query_eg_01_Y'))

a1 = pred_vec.PredLog()

#a1.fit()

a1.theta = np.array(
    [[-4.50881918e+01, -2.94574614e-02, -1.67172965e-02, 4.22452074e-05]])

a1.bias = np.array([1.56534954])

a1.classes = np.array([0., 1.])

a1.binary_predict(X)

print 'scores min =', np.min(a1.scores)

print 'scores max =', np.max(a1.scores)

#np.savetxt( os.path.join( curdir, 'scores.npy' ), a1.scores )
Пример #12
0
#Xtr_abr = np.abs( np.hstack( [ X_train[:,1:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) )
#Xcv_abr = np.abs( np.hstack( [ X_CV[:,1:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) )

#X_train = np.abs( np.hstack( [ X_train[:,:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) )
#X_CV = np.abs( np.hstack( [ X_CV[:,:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) )

if __name__ == '__main__':

    for i in range(1, 3):
        X_train = create_array_poly(X_train, i)
        X_CV = create_array_poly(X_CV, i)
        #Xtr_abr = create_array_poly( Xtr_abr, degree )
        #Xcv_abr = create_array_poly( Xcv_abr, degree )

        a1 = pred_vec.PredLog(X=X_train)
        a1.fscale()

        a2 = pred_vec.PredLog(X=X_CV)
        a2.fscale()

        for Cval in (1e15, 1e12, 1e10, 1e8, 1e6, 1e5, 1e3, 1e2, 5e1, 1.25e1,
                     2.5e0, 1e0, 2.5e-1, 5e-1, 1e-2, 1e-3):
            #compare_train_CV_logistic( X_train[:,1:], y_train, X_CV[:,1:], y_CV, i, 'Unscaled full feature set, degree = ' + str(i) + ', C = ' + str(Cval), C=Cval, scale=False )
            #compare_train_CV_logistic( abridge_features(X_train), y_train, abridge_features(X_CV), y_CV, i, 'Unscaled abridged feature set, degree = ' + str(i) + ', C = ' + str(Cval), C=Cval, scale=False )
            compare_train_CV_logistic(a1.X,
                                      y_train,
                                      a2.X,
                                      y_CV,
                                      i,
                                      'Scaled full feature set, degree = ' +