def test_clean_ambiguities(self): """A check on the 'clean_ambiguities' and 'remove_dualities' methods. """ a = pred_vec.PredLog() trial = a.clean_ambiguities(EG_LEGMAT, EG_SCORES_RAW, EG_Y) self.assertTrue((EG_FIXED_Y == trial).all())
def compare_train_CV_logistic(Xtr, ytr, Xcv, ycv, degree, description, C=1e5, scale=False, filestump='Cf_log_01'): """ """ #Xtr = create_array_poly( Xtr, degree ) #Xcv = create_array_poly( Xcv, degree ) a1 = pred_vec.PredLog(X=Xtr, y=ytr, C=C) #if scale: # a1.fscale() #a1.X = np.abs( a1.X ) a1.fit() a1.binary_predict(a1.X) print_output(a1, ytr, description, 'train', filestump, degree, C) a1.X = Xcv #if scale: # a1.fscale() a1.binary_predict(a1.X) print_output(a1, ycv, description, 'CV', filestump, degree, C)
def test_metrics(Xtest, ytest, degree, description, C=1e5, scale=False, filestump='Cf_log_01'): a1 = pred_vec.PredLog(X=Xtr, y=ytr, C=C) a1.fit() a1.binary_predict(a1.X) print_output(a1, ytr, description, 'train', filestump, degree, C)
def test_binary_predict(self): """A lightweigth check on the 'binary_predict' method. Assumes that arithmetic is close enough across systems to generate identical predictions A more thorough test would require fitting of data and is probably unnecessary """ a = pred_vec.PredLog(C=1e5) a.theta = theta a.bias = bias a.classes = classes a.binary_predict(X) #check = ( y == a.y_pred ).all() predind = np.nonzero(a.y_pred == 1)[0] check = (predind == y_pred_posindices_expected).all() self.assertTrue(check) self.assertTrue(a.y_pred.ndim == 1) self.assertTrue((a.y_pred == a.y_pred_logistic).all())
def compare_train_CV_logistic( Xtr, ytr, Xcv, ycv, degree, description, C=1e5, scale=False ): """ """ Xtr = create_array_poly( Xtr, degree ) Xcv = create_array_poly( Xcv, degree ) a1 = pred_vec.PredLog( X=Xtr, y=ytr, C = C ) if scale: a1.fscale() a1.fit() a1.binary_predict( a1.X ) print_output( a1, ytr, description + ' Training metrics' ) a1.X = Xcv if scale: a1.fscale() a1.binary_predict( a1.X ) print_output( a1, ycv, description + ' Cross-Validation metrics' )
import numpy as np import os from vec_hsqc import pred_vec curdir = os.path.dirname(os.path.abspath(__file__)) #X = np.zeros( (500,5) ) #y = np.zeros( 500 ) X = np.loadtxt(os.path.join(curdir, '140225_composite_X.npy')) y = np.loadtxt(os.path.join(curdir, '140225_composite_Y.npy')) a1 = pred_vec.PredLog(X=X, y=y) a1.fit() a1.binary_predict(a1.X) print 'Unscaled, full feature set\n\n' print 'scores min =', np.min(a1.scores) print 'scores max =', np.max(a1.scores) a1._standard_measures_binary(y, a1.y_pred, verbose=True) print '\n\nNonregularized cost function =', a1._cost_function_nonreg( y, a1.scores_logistic), '\n\n' print '\n', '-' * 30, '\n\n'
a1.fit() a1.binary_predict(a1.X) print_output(a1, ytr, description, 'train', filestump, degree, C) #Xtr_abr = np.abs( np.hstack( [ X_train[:,1:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) ) #Xcv_abr = np.abs( np.hstack( [ X_CV[:,1:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) ) #X_train = np.abs( np.hstack( [ X_train[:,:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) ) #X_CV = np.abs( np.hstack( [ X_CV[:,:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) ) #X = create_array_poly( X_test, 2) if __name__ == '__main__': X_test = create_array_poly(X_test, 2) a1 = pred_vec.PredLog(X=X_test) a1.fscale() a1.theta = THETA a1.bias = BIAS a1.binary_predict(a1.X) a1.y_pred = a1.clean_ambiguities(legmat, a1.scores_logistic, a1.y_pred_logistic) print_output(a1, y_test, 'test set', 'test', 'crap01', 2, 1000) a2 = post_proc.DataOut() a2.generate_master_peak_list(a1.y_pred, legmat, csarray_test) np.savetxt(os.path.join(curdir, 'master_array_new.npy'), a2.master_array, fmt="%s") a2.writeall_peak_lists(a2.master_array, curdir, 'test_set_prediction_new_')
X = mat(c_[X[:500000, :]]) y = c_[y[:500000]] legmat = legmat[:500000, :] np.savetxt(os.path.join(curdir, 'first500k_X.npy'), X) np.savetxt(os.path.join(curdir, 'first500k_y.npy'), y) np.savetxt(os.path.join(curdir, 'first500k_legmat.npy'), legmat, fmt="%s") #X = np.loadtxt( 'first500k_X.npy' ) #y = np.loadtxt( 'first500k_y.npy' ) #legmat = np.loadtxt( 'first500k_legmat.npy', dtype = 'str' ) print X.shape, y.shape, legmat.shape #y = np.loadtxt( 'pred_eg_01_Y' ) X = mat(c_[np.hstack([np.reshape(np.ones(X.shape[0]), (X.shape[0], 1)), X])]) print X.shape a = pred_vec.PredLog(X, y) a.fscale() a.train_classifier() a.make_prediction() np.savetxt(os.path.join(curdir, 'scaled_x_500k'), a.X) np.savetxt(os.path.join(curdir, 'first500k_predicted_Y'), a.pred_Y) np.savetxt(os.path.join(curdir, 'first500k_prediction'), a.pred)
a1.binary_predict( a1.X ) print_output( a1, ycv, description, 'CV', filestump, degree, C ) #Xtr_abr = np.abs( np.hstack( [ X_train[:,1:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) ) #Xcv_abr = np.abs( np.hstack( [ X_CV[:,1:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) ) X_train = np.abs( np.hstack( [ X_train[:,:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) ) X_CV = np.abs( np.hstack( [ X_CV[:,:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) ) if __name__ == '__main__': for i in range(1,3): Xtr_abr = create_array_poly( Xtr_abr, i ) Xcv_abr = create_array_poly( Xcv_abr, i ) a1 = pred_vec.PredLog( X=Xtr_abr ) a1.fscale() a2 = pred_vec.PredLog( X=Xcv_abr ) a2.fscale() for Cval in ( 1e15, 1e12, 1e10, 1e8, 1e6, 1e5, 1e3, 1e2, 5e1, 1.25e1, 2.5e0, 1e0, 2.5e-1, 5e-1, 1e-2, 1e-3): compare_train_CV_logistic( a1.X, y_train, a2.X, y_CV, i, 'Scaled abridged feature set, degree = ' + str(i) + ', C = ' + str(Cval), C=Cval, scale=True, filestump = 'absval_log_new_01' ) for i in range(1,3): X_train = create_array_poly( X_train, i ) X_CV = create_array_poly( X_CV, i ) #Xtr_abr = create_array_poly( Xtr_abr, degree ) #Xcv_abr = create_array_poly( Xcv_abr, degree )
import numpy as np import os from vec_hsqc import pred_vec curdir = os.path.dirname(os.path.abspath(__file__)) #X = np.zeros( (500,5) ) #y = np.zeros( 500 ) X = np.loadtxt(os.path.join(curdir, '140225_composite_X.npy')) y = np.loadtxt(os.path.join(curdir, )) a1 = pred_vec.PredLog(X, y=y, C=1e5) a1.fit() a1.binary_predict(a1.X) a1._standard_measures_binary(a1.y, a1.y_pred)
import numpy as np import os from vec_hsqc import pred_vec curdir = os.path.dirname(os.path.abspath(__file__)) #X = np.zeros( (500,5) ) #y = np.zeros( 500 ) X = np.loadtxt(os.path.join(curdir, 'query_eg_01_X')) y = np.loadtxt(os.path.join(curdir, 'query_eg_01_Y')) a1 = pred_vec.PredLog() #a1.fit() a1.theta = np.array( [[-4.50881918e+01, -2.94574614e-02, -1.67172965e-02, 4.22452074e-05]]) a1.bias = np.array([1.56534954]) a1.classes = np.array([0., 1.]) a1.binary_predict(X) print 'scores min =', np.min(a1.scores) print 'scores max =', np.max(a1.scores) #np.savetxt( os.path.join( curdir, 'scores.npy' ), a1.scores )
#Xtr_abr = np.abs( np.hstack( [ X_train[:,1:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) ) #Xcv_abr = np.abs( np.hstack( [ X_CV[:,1:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) ) #X_train = np.abs( np.hstack( [ X_train[:,:-2], X_train[:,-1].reshape( X_train.shape[0], 1) ] ) ) #X_CV = np.abs( np.hstack( [ X_CV[:,:-2], X_CV[:,-1].reshape( X_CV.shape[0], 1) ] ) ) if __name__ == '__main__': for i in range(1, 3): X_train = create_array_poly(X_train, i) X_CV = create_array_poly(X_CV, i) #Xtr_abr = create_array_poly( Xtr_abr, degree ) #Xcv_abr = create_array_poly( Xcv_abr, degree ) a1 = pred_vec.PredLog(X=X_train) a1.fscale() a2 = pred_vec.PredLog(X=X_CV) a2.fscale() for Cval in (1e15, 1e12, 1e10, 1e8, 1e6, 1e5, 1e3, 1e2, 5e1, 1.25e1, 2.5e0, 1e0, 2.5e-1, 5e-1, 1e-2, 1e-3): #compare_train_CV_logistic( X_train[:,1:], y_train, X_CV[:,1:], y_CV, i, 'Unscaled full feature set, degree = ' + str(i) + ', C = ' + str(Cval), C=Cval, scale=False ) #compare_train_CV_logistic( abridge_features(X_train), y_train, abridge_features(X_CV), y_CV, i, 'Unscaled abridged feature set, degree = ' + str(i) + ', C = ' + str(Cval), C=Cval, scale=False ) compare_train_CV_logistic(a1.X, y_train, a2.X, y_CV, i, 'Scaled full feature set, degree = ' +