def lasso_(X_train, Y_train, X_test, Y_test, alpha=0.1, regularize=[0.7, 0.7, 0.7]): if len(regularize) == 1: regularize = regularize * 3 def lasso_maker(): return Lasso(alpha=alpha) n_subjects = 49 predicted_train = [] observed_train = [] predicted_test = [] observed_test = [] lassos = {subject: lasso_maker() for subject in range(1, n_subjects + 1)} for subject in range(1, n_subjects + 1): observed = Y_train[subject][:, 1:2] lasso = lassos[subject] lasso.fit(X_train, observed) predicted = lasso.predict(X_train)[:, np.newaxis] observed_train.append(observed) predicted_train.append(predicted) observed = Y_test[subject][:, 1:2] predicted = lasso.predict(X_test)[:, np.newaxis] observed_test.append(observed) predicted_test.append(predicted) scores = {} for phase, predicted_, observed_ in [ ('train', predicted_train, observed_train), ('test', predicted_test, observed_test) ]: predicted = np.dstack(predicted_) observed = np.ma.dstack(observed_) predicted_mean = np.mean(predicted, axis=2, keepdims=True) #predicted_int = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted predicted_ple = regularize[1]*(predicted_mean)\ + (1-regularize[1])*predicted #predicted_dec = regularize[2]*(predicted_mean) + (1-regularize[2])*predicted #score1_ = scoring.score(predicted_int,observed,n_subjects=n_subjects) #r_int = scoring.r('int',predicted,observed) #r_ple = scoring.r('ple',predicted,observed) r_ple = scoring.r(None, predicted_ple, observed) r2_ple = scoring.r2(None, None, predicted_ple.mean(axis=2), observed.mean(axis=2)) #r_dec = scoring.r('dec',predicted,observed) #score1 = scoring.rs2score(r_int,r_ple,r_dec) print("For subchallenge 1, %s phase, score = %.2f" % (phase, r_ple)) print("For subchallenge 2, %s phase, score = %.2f" % (phase, r2_ple)) scores[phase] = (r_ple, r2_ple) return lassos, scores['train'], scores['test']
def subject_regularize(rfcs, X_int, X_other, Y, oob=False, regularize=[0.75, 0.3, 0.65]): if len(regularize) == 1: regularize = regularize * 3 observed_ = [] predicted_ = [] for subject in range(1, 50): observed = Y['subject'][subject] rfc = rfcs[1][subject] if oob: predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X_other) predicted_int = rfc.predict(X_int) predicted[:, 0] = predicted_int[:, 0] observed_.append(observed) predicted_.append(predicted) predicted = np.dstack(predicted_) observed = np.ma.dstack(observed_) predicted_mean = np.mean(predicted, axis=2, keepdims=True) predicted_std = np.std(predicted, axis=2, keepdims=True) predicted_mean_std = np.hstack((predicted_mean, predicted_std)).squeeze() predicted_int = regularize[0]*(predicted_mean)\ + (1-regularize[0])*predicted predicted_ple = regularize[1]*(predicted_mean)\ + (1-regularize[1])*predicted predicted_dec = regularize[2]*(predicted_mean)\ + (1-regularize[2])*predicted predicted = regularize[0]*(predicted_mean)\ + (1-regularize[0])*predicted r_int = scoring.r('int', predicted_int, observed) r_ple = scoring.r('ple', predicted_ple, observed) r_dec = scoring.r('dec', predicted_dec, observed) score1_ = scoring.score(predicted, observed, n_subjects=49) score1 = scoring.rs2score(r_int, r_ple, r_dec) #print(score1_,score1) print("For subchallenge %d, score = %.3f (%.3f,%.3f,%.3f)"\ % (1,score1,r_int,r_ple,r_dec)) score2 = scoring.score2(predicted_mean_std, Y['mean_std']) r_int_mean = scoring.r2('int', 'mean', predicted_mean_std, Y['mean_std']) r_ple_mean = scoring.r2('ple', 'mean', predicted_mean_std, Y['mean_std']) r_dec_mean = scoring.r2('dec', 'mean', predicted_mean_std, Y['mean_std']) r_int_std = scoring.r2('int', 'std', predicted_mean_std, Y['mean_std']) r_ple_std = scoring.r2('ple', 'std', predicted_mean_std, Y['mean_std']) r_dec_std = scoring.r2('dec', 'std', predicted_mean_std, Y['mean_std']) print("For subchallenge %d, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)"%\ (2,score2,r_int_mean,r_ple_mean,r_dec_mean, r_int_std,r_ple_std,r_dec_std)) return (r_int, r_ple, r_dec, r_int_mean, r_ple_mean, r_dec_mean, r_int_std, r_ple_std, r_dec_std)
def lasso_(X_train,Y_train,X_test,Y_test,alpha=0.1,regularize=[0.7,0.7,0.7]): if len(regularize)==1: regularize = regularize*3 def lasso_maker(): return Lasso(alpha=alpha) n_subjects = 49 predicted_train = [] observed_train = [] predicted_test = [] observed_test = [] lassos = {subject:lasso_maker() for subject in range(1,n_subjects+1)} for subject in range(1,n_subjects+1): observed = Y_train[subject][:,1:2] lasso = lassos[subject] lasso.fit(X_train,observed) predicted = lasso.predict(X_train)[:,np.newaxis] observed_train.append(observed) predicted_train.append(predicted) observed = Y_test[subject][:,1:2] predicted = lasso.predict(X_test)[:,np.newaxis] observed_test.append(observed) predicted_test.append(predicted) scores = {} for phase,predicted_,observed_ in [('train',predicted_train,observed_train),('test',predicted_test,observed_test)]: predicted = np.dstack(predicted_) observed = np.ma.dstack(observed_) predicted_mean = np.mean(predicted,axis=2,keepdims=True) #predicted_int = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted predicted_ple = regularize[1]*(predicted_mean) + (1-regularize[1])*predicted #predicted_dec = regularize[2]*(predicted_mean) + (1-regularize[2])*predicted #score1_ = scoring.score(predicted_int,observed,n_subjects=n_subjects) #r_int = scoring.r('int',predicted,observed) #r_ple = scoring.r('ple',predicted,observed) r_ple = scoring.r(None,predicted_ple,observed) r2_ple = scoring.r2(None,None,predicted_ple.mean(axis=2),observed.mean(axis=2)) #r_dec = scoring.r('dec',predicted,observed) #score1 = scoring.rs2score(r_int,r_ple,r_dec) print("For subchallenge 1, %s phase, score = %.2f" % (phase,r_ple)) print("For subchallenge 2, %s phase, score = %.2f" % (phase,r2_ple)) scores[phase] = (r_ple,r2_ple) return lassos,scores['train'],scores['test']
def rfc_(X_train, Y_train, X_test_int, X_test_other, Y_test, max_features=1500, n_estimators=1000, max_depth=None, min_samples_leaf=1): print(max_features) def rfc_maker(): return RandomForestRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, n_jobs=-1, oob_score=True, random_state=0) rfc = rfc_maker() rfc.fit(X_train, Y_train) scores = {} for phase, X, Y in [('train', X_train, Y_train), ('test', (X_test_int, X_test_other), Y_test)]: if phase == 'train': predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X[1]) predicted_int = rfc.predict(X[0]) predicted[:, 0] = predicted_int[:, 0] predicted[:, 21] = predicted_int[:, 21] observed = Y score = scoring.score2(predicted, observed) r_int = scoring.r2('int', 'mean', predicted, observed) r_ple = scoring.r2('ple', 'mean', predicted, observed) r_dec = scoring.r2('dec', 'mean', predicted, observed) r_int_sig = scoring.r2('int', 'sigma', predicted, observed) r_ple_sig = scoring.r2('ple', 'sigma', predicted, observed) r_dec_sig = scoring.r2('dec', 'sigma', predicted, observed) print("For subchallenge 2, %s phase, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" \ % (phase,score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig)) scores[phase] = (score, r_int, r_ple, r_dec, r_int_sig, r_ple_sig, r_dec_sig) return rfc, scores['train'], scores['test']
def subject_regularize(rfcs,X_int,X_other,Y,oob=False,regularize=[0.75,0.3,0.65]): if len(regularize)==1: regularize = regularize*3 observed_ = [] predicted_ = [] for subject in range(1,50): observed = Y['subject'][subject] rfc = rfcs[1][subject] if oob: predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X_other) predicted_int = rfc.predict(X_int) predicted[:,0] = predicted_int[:,0] observed_.append(observed) predicted_.append(predicted) predicted = np.dstack(predicted_) observed = np.ma.dstack(observed_) predicted_mean = np.mean(predicted,axis=2,keepdims=True) predicted_std = np.std(predicted,axis=2,keepdims=True) predicted_mean_std = np.hstack((predicted_mean,predicted_std)).squeeze() predicted_int = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted predicted_ple = regularize[1]*(predicted_mean) + (1-regularize[1])*predicted predicted_dec = regularize[2]*(predicted_mean) + (1-regularize[2])*predicted predicted = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted r_int = scoring.r('int',predicted_int,observed) r_ple = scoring.r('ple',predicted_ple,observed) r_dec = scoring.r('dec',predicted_dec,observed) score1_ = scoring.score(predicted,observed,n_subjects=49) score1 = scoring.rs2score(r_int,r_ple,r_dec) #print(score1_,score1) print("For subchallenge %d, score = %.3f (%.3f,%.3f,%.3f)" % (1,score1,r_int,r_ple,r_dec)) score2 = scoring.score2(predicted_mean_std,Y['mean_std']) r_int_mean = scoring.r2('int','mean',predicted_mean_std,Y['mean_std']) r_ple_mean = scoring.r2('ple','mean',predicted_mean_std,Y['mean_std']) r_dec_mean = scoring.r2('dec','mean',predicted_mean_std,Y['mean_std']) r_int_sigma = scoring.r2('int','sigma',predicted_mean_std,Y['mean_std']) r_ple_sigma = scoring.r2('ple','sigma',predicted_mean_std,Y['mean_std']) r_dec_sigma = scoring.r2('dec','sigma',predicted_mean_std,Y['mean_std']) print("For subchallenge %d, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" % \ (2,score2,r_int_mean,r_ple_mean,r_dec_mean,r_int_sigma,r_ple_sigma,r_dec_sigma)) return (r_int,r_ple,r_dec,r_int_mean,r_ple_mean,r_dec_mean,r_int_sigma,r_ple_sigma,r_dec_sigma)
def rfc_(X_train,Y_train,X_test_int,X_test_other,Y_test, max_features=1500,n_estimators=1000,max_depth=None,min_samples_leaf=1): print(max_features) def rfc_maker(): return RandomForestRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, n_jobs=-1, oob_score=True, random_state=0) rfc = rfc_maker() rfc.fit(X_train,Y_train) scores = {} for phase,X,Y in [('train',X_train,Y_train),('test',(X_test_int,X_test_other),Y_test)]: if phase == 'train': predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X[1]) predicted_int = rfc.predict(X[0]) predicted[:,0] = predicted_int[:,0] predicted[:,21] = predicted_int[:,21] observed = Y score = scoring.score2(predicted,observed) r_int = scoring.r2('int','mean',predicted,observed) r_ple = scoring.r2('ple','mean',predicted,observed) r_dec = scoring.r2('dec','mean',predicted,observed) r_int_sig = scoring.r2('int','sigma',predicted,observed) r_ple_sig = scoring.r2('ple','sigma',predicted,observed) r_dec_sig = scoring.r2('dec','sigma',predicted,observed) print("For subchallenge 2, %s phase, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" \ % (phase,score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig)) scores[phase] = (score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig) return rfc,scores['train'],scores['test']
def rfc_final(X,Y_imp,Y_mask, max_features,min_samples_leaf,max_depth,et,use_mask,trans_weight, trans_params,X_test_int=None,X_test_other=None,Y_test=None,n_estimators=100,seed=0,quiet=False): if X_test_int is None: X_test_int = X if X_test_other is None: X_test_other = X if Y_test is None: Y_test = Y_mask def rfc_maker(n_estimators=n_estimators,max_features=max_features, min_samples_leaf=min_samples_leaf,max_depth=max_depth,et=False): if not et: kls = RandomForestRegressor kwargs = {'oob_score':False} else: kls = ExtraTreesRegressor kwargs = {} return kls(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, n_jobs=-1, random_state=seed, **kwargs) rfcs = {} for col in range(42): prog(col,42) rfcs[col] = rfc_maker(n_estimators=n_estimators, max_features=max_features[col], min_samples_leaf=min_samples_leaf[col], max_depth=max_depth[col], et=et[col]) if use_mask[col]: rfcs[col].fit(X,Y_mask[:,col]) else: rfcs[col].fit(X,Y_imp[:,col]) predicted = np.zeros((X_test_int.shape[0],42)) for col in range(42): if et[col] or not np.array_equal(X,X_test_int): # Possibly check in-sample fit because there isn't any alternative. if col in [0,21]: predicted[:,col] = rfcs[col].predict(X_test_int) else: predicted[:,col] = rfcs[col].predict(X_test_other) else: try: predicted[:,col] = rfcs[col].oob_prediction_ except AttributeError: if col in [0,21]: predicted[:,col] = rfcs[col].predict(X_test_int) else: predicted[:,col] = rfcs[col].predict(X_test_other) def f_transform(x, k0, k1): return 100*(k0*(x/100)**(k1*0.5) - k0*(x/100)**(k1*2)) for col in range(21): tw = trans_weight[col] k0,k1 = trans_params[col] p_m = predicted[:,col] p_s = predicted[:,col+21] predicted[:,col+21] = tw*f_transform(p_m,k0,k1) + (1-tw)*p_s observed = Y_test score = scoring.score2(predicted,observed) rs = {} for kind in ['int','ple','dec']: rs[kind] = {} for moment in ['mean','sigma']: rs[kind][moment] = scoring.r2(kind,moment,predicted,observed) if not quiet: print("For subchallenge 2:") print("\tScore = %.2f" % score) for kind in ['int','ple','dec']: for moment in ['mean','sigma']: print("\t%s_%s = %.3f" % (kind,moment,rs[kind][moment])) return (rfcs,score,rs)
def rfc_cv(X,Y_imp,Y_mask,Y_test=None,n_splits=10,n_estimators=100, max_features=1500,min_samples_leaf=1,max_depth=None,rfc=True): if Y_mask is None: use_Y_mask = False Y_mask = Y_imp else: use_Y_mask = True if Y_test is None: Y_test = Y_mask if rfc: rfc_imp = RandomForestRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, oob_score=False,n_jobs=-1,random_state=0) rfc_mask = RandomForestRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, oob_score=False,n_jobs=-1,random_state=0) else: rfc_imp = ExtraTreesRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, oob_score=False,n_jobs=-1,random_state=0) rfc_mask = ExtraTreesRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, oob_score=False,n_jobs=-1,random_state=0) test_size = 0.2 shuffle_split = ShuffleSplit(len(Y_imp),n_splits,test_size=test_size,random_state=0) test_size *= len(Y_imp) rs = {'int':{'mean':[],'sigma':[],'trans':[]},'ple':{'mean':[],'sigma':[]},'dec':{'mean':[],'sigma':[]}} scores = [] for train_index,test_index in shuffle_split: rfc_imp.fit(X[train_index],Y_imp[train_index]) predicted_imp = rfc_imp.predict(X[test_index]) if use_Y_mask: rfc_mask.fit(X[train_index],Y_mask[train_index]) predicted_mask = rfc_mask.predict(X[test_index]) else: predicted_mask = predicted_imp observed = Y_test[test_index] rs_ = {'int':{},'ple':{},'dec':{}} for kind1 in ['int','ple','dec']: for kind2 in ['mean','sigma']: if kind2 in rs[kind1]: if '%s_%s' % (kind1,kind2) in ['int_mean','ple_mean','dec_mean']: r_ = scoring.r2(kind1,kind2,predicted_imp,observed) else: r_ = scoring.r2(kind1,kind2,predicted_mask,observed) rs_[kind1][kind2] = r_ rs[kind1][kind2].append(r_) score = scoring.rs2score2(rs_) scores.append(score) rs['int']['trans'].append(scoring.r2(None,None,f_int(predicted_imp[:,0]),observed[:,21])) for kind1 in ['int','ple','dec']: for kind2 in ['mean','sigma','trans']: if kind2 in rs[kind1]: rs[kind1][kind2] = {'mean':np.mean(rs[kind1][kind2]),'sem':np.std(rs[kind1][kind2])/np.sqrt(n_splits)} scores = {'mean':np.mean(scores),'sem':np.std(scores)/np.sqrt(n_splits)} #print("For subchallenge 2, using cross-validation with:") #print("\tat most %s features:" % max_features) #print("\tat least %s samples per leaf:" % min_samples_leaf) #print("\tat most %s depth:" % max_depth) #print("\tscore = %.2f+/- %.2f" % (scores['mean'],scores['sem'])) for kind2 in ['mean','sigma','trans']: for kind1 in ['int','ple','dec']: if kind2 in rs[kind1]: pass#print("\t%s_%s = %.3f+/- %.3f" % (kind1,kind2,rs[kind1][kind2]['mean'],rs[kind1][kind2]['sem'])) return scores,rs
def rfc_final(X,Y_imp,Y_mask, max_features,min_samples_leaf,max_depth,et,use_mask, Y_test=None,n_estimators=100,seed=0): if Y_test is None: Y_test = Y_mask def rfc_maker(n_estimators=n_estimators,max_features=max_features, min_samples_leaf=min_samples_leaf,max_depth=max_depth,et=False): if not et: return RandomForestRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, oob_score=True, n_jobs=-1,random_state=seed) else: return ExtraTreesRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, n_jobs=-1,random_state=seed) rfcs = {} for kind in ['int','ple','dec']: rfcs[kind] = {} for moment in ['mean','sigma']: rfcs[kind][moment] = rfc_maker(n_estimators=n_estimators, max_features=max_features[kind][moment], min_samples_leaf=min_samples_leaf[kind][moment], max_depth=max_depth[kind][moment], et=et[kind][moment]) for kind in ['int','ple','dec']: for moment in ['mean','sigma']: if use_mask[kind][moment]: rfcs[kind][moment].fit(X,Y_mask) else: rfcs[kind][moment].fit(X,Y_imp) predictions = {} for kind in ['int','ple','dec']: predictions[kind] = {} for moment in ['mean','sigma']: if et[kind][moment]: # Check in-sample fit because there isn't any alternative. predictions[kind][moment] = rfcs[kind][moment].predict(X) else: predictions[kind][moment] = rfcs[kind][moment].oob_prediction_ predicted = predictions['int']['mean'].copy() for i,moment in enumerate(['mean','sigma']): predicted[:,(0+21*i)] = predictions['int'][moment][:,(0+21*i)] predicted[:,(1+21*i)] = predictions['ple'][moment][:,(1+21*i)] predicted[:,(2+21*i):(21+21*i)] = predictions['dec'][moment][:,(2+21*i):(21+21*i)] observed = Y_test score = scoring.score2(predicted,observed) rs = {} predictions = {} for kind in ['int','ple','dec']: rs[kind] = {} for moment in ['mean','sigma']: rs[kind][moment] = scoring.r2(kind,moment,predicted,observed) rs['int']['trans'] = scoring.r2(None,None,f_int(predicted[:,0]),observed[:,0]) print("For subchallenge 2:") print("\tScore = %.2f" % score) for kind in ['int','ple','dec']: for moment in ['mean','sigma']: print("\t%s_%s = %.3f" % (kind,moment,rs[kind][moment])) return (rfcs,score,rs)
def rfc_final(X, Y_imp, Y_mask, max_features, min_samples_leaf, max_depth, et, use_mask, Y_test=None, n_estimators=100, seed=0): if Y_test is None: Y_test = Y_mask def rfc_maker(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, et=False): if not et: return RandomForestRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, oob_score=True, n_jobs=-1, random_state=seed) else: return ExtraTreesRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, n_jobs=-1, random_state=seed) rfcs = {} for kind in ['int', 'ple', 'dec']: rfcs[kind] = {} for moment in ['mean', 'sigma']: rfcs[kind][moment] = rfc_maker( n_estimators=n_estimators, max_features=max_features[kind][moment], min_samples_leaf=min_samples_leaf[kind][moment], max_depth=max_depth[kind][moment], et=et[kind][moment]) for kind in ['int', 'ple', 'dec']: for moment in ['mean', 'sigma']: if use_mask[kind][moment]: rfcs[kind][moment].fit(X, Y_mask) else: rfcs[kind][moment].fit(X, Y_imp) predictions = {} for kind in ['int', 'ple', 'dec']: predictions[kind] = {} for moment in ['mean', 'sigma']: if et[kind][moment]: # Check in-sample fit because there isn't any alternative. predictions[kind][moment] = rfcs[kind][moment].predict(X) else: predictions[kind][moment] = rfcs[kind][moment].oob_prediction_ predicted = predictions['int']['mean'].copy() for i, moment in enumerate(['mean', 'sigma']): predicted[:, (0 + 21 * i)] = predictions['int'][moment][:, (0 + 21 * i)] predicted[:, (1 + 21 * i)] = predictions['ple'][moment][:, (1 + 21 * i)] predicted[:, (2 + 21 * i):(21 + 21 * i)] = predictions['dec'][moment][:, ( 2 + 21 * i):(21 + 21 * i)] observed = Y_test score = scoring.score2(predicted, observed) rs = {} predictions = {} for kind in ['int', 'ple', 'dec']: rs[kind] = {} for moment in ['mean', 'sigma']: rs[kind][moment] = scoring.r2(kind, moment, predicted, observed) rs['int']['trans'] = scoring.r2(None, None, f_int(predicted[:, 0]), observed[:, 0]) print("For subchallenge 2:") print("\tScore = %.2f" % score) for kind in ['int', 'ple', 'dec']: for moment in ['mean', 'sigma']: print("\t%s_%s = %.3f" % (kind, moment, rs[kind][moment])) return (rfcs, score, rs)