Python r2示例，opc_python.utils.scoring.r2 Python示例

示例#1

0

显示文件

def lasso_(X_train,
           Y_train,
           X_test,
           Y_test,
           alpha=0.1,
           regularize=[0.7, 0.7, 0.7]):
    if len(regularize) == 1:
        regularize = regularize * 3

    def lasso_maker():
        return Lasso(alpha=alpha)

    n_subjects = 49
    predicted_train = []
    observed_train = []
    predicted_test = []
    observed_test = []
    lassos = {subject: lasso_maker() for subject in range(1, n_subjects + 1)}
    for subject in range(1, n_subjects + 1):
        observed = Y_train[subject][:, 1:2]
        lasso = lassos[subject]
        lasso.fit(X_train, observed)
        predicted = lasso.predict(X_train)[:, np.newaxis]
        observed_train.append(observed)
        predicted_train.append(predicted)

        observed = Y_test[subject][:, 1:2]
        predicted = lasso.predict(X_test)[:, np.newaxis]
        observed_test.append(observed)
        predicted_test.append(predicted)
    scores = {}
    for phase, predicted_, observed_ in [
        ('train', predicted_train, observed_train),
        ('test', predicted_test, observed_test)
    ]:
        predicted = np.dstack(predicted_)
        observed = np.ma.dstack(observed_)
        predicted_mean = np.mean(predicted, axis=2, keepdims=True)
        #predicted_int = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted
        predicted_ple = regularize[1]*(predicted_mean)\
                      + (1-regularize[1])*predicted
        #predicted_dec = regularize[2]*(predicted_mean) + (1-regularize[2])*predicted
        #score1_ = scoring.score(predicted_int,observed,n_subjects=n_subjects)
        #r_int = scoring.r('int',predicted,observed)
        #r_ple = scoring.r('ple',predicted,observed)
        r_ple = scoring.r(None, predicted_ple, observed)
        r2_ple = scoring.r2(None, None, predicted_ple.mean(axis=2),
                            observed.mean(axis=2))
        #r_dec = scoring.r('dec',predicted,observed)
        #score1 = scoring.rs2score(r_int,r_ple,r_dec)
        print("For subchallenge 1, %s phase, score = %.2f" % (phase, r_ple))
        print("For subchallenge 2, %s phase, score = %.2f" % (phase, r2_ple))
        scores[phase] = (r_ple, r2_ple)
    return lassos, scores['train'], scores['test']

示例#2

0

显示文件

def subject_regularize(rfcs,
                       X_int,
                       X_other,
                       Y,
                       oob=False,
                       regularize=[0.75, 0.3, 0.65]):
    if len(regularize) == 1:
        regularize = regularize * 3
    observed_ = []
    predicted_ = []
    for subject in range(1, 50):
        observed = Y['subject'][subject]
        rfc = rfcs[1][subject]
        if oob:
            predicted = rfc.oob_prediction_
        else:
            predicted = rfc.predict(X_other)
            predicted_int = rfc.predict(X_int)
            predicted[:, 0] = predicted_int[:, 0]
        observed_.append(observed)
        predicted_.append(predicted)
    predicted = np.dstack(predicted_)
    observed = np.ma.dstack(observed_)
    predicted_mean = np.mean(predicted, axis=2, keepdims=True)
    predicted_std = np.std(predicted, axis=2, keepdims=True)
    predicted_mean_std = np.hstack((predicted_mean, predicted_std)).squeeze()
    predicted_int = regularize[0]*(predicted_mean)\
                  + (1-regularize[0])*predicted
    predicted_ple = regularize[1]*(predicted_mean)\
                  + (1-regularize[1])*predicted
    predicted_dec = regularize[2]*(predicted_mean)\
                  + (1-regularize[2])*predicted
    predicted = regularize[0]*(predicted_mean)\
              + (1-regularize[0])*predicted
    r_int = scoring.r('int', predicted_int, observed)
    r_ple = scoring.r('ple', predicted_ple, observed)
    r_dec = scoring.r('dec', predicted_dec, observed)
    score1_ = scoring.score(predicted, observed, n_subjects=49)
    score1 = scoring.rs2score(r_int, r_ple, r_dec)
    #print(score1_,score1)
    print("For subchallenge %d, score = %.3f (%.3f,%.3f,%.3f)"\
          % (1,score1,r_int,r_ple,r_dec))
    score2 = scoring.score2(predicted_mean_std, Y['mean_std'])
    r_int_mean = scoring.r2('int', 'mean', predicted_mean_std, Y['mean_std'])
    r_ple_mean = scoring.r2('ple', 'mean', predicted_mean_std, Y['mean_std'])
    r_dec_mean = scoring.r2('dec', 'mean', predicted_mean_std, Y['mean_std'])
    r_int_std = scoring.r2('int', 'std', predicted_mean_std, Y['mean_std'])
    r_ple_std = scoring.r2('ple', 'std', predicted_mean_std, Y['mean_std'])
    r_dec_std = scoring.r2('dec', 'std', predicted_mean_std, Y['mean_std'])
    print("For subchallenge %d, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)"%\
          (2,score2,r_int_mean,r_ple_mean,r_dec_mean,
          r_int_std,r_ple_std,r_dec_std))
    return (r_int, r_ple, r_dec, r_int_mean, r_ple_mean, r_dec_mean, r_int_std,
            r_ple_std, r_dec_std)

示例#3

0

显示文件

文件： fit1.py 项目： bence-szalai/olfaction-prediction

def lasso_(X_train,Y_train,X_test,Y_test,alpha=0.1,regularize=[0.7,0.7,0.7]):
    if len(regularize)==1:
        regularize = regularize*3
    def lasso_maker():
        return Lasso(alpha=alpha)
    n_subjects = 49
    predicted_train = []
    observed_train = []
    predicted_test = []
    observed_test = []
    lassos = {subject:lasso_maker() for subject in range(1,n_subjects+1)}
    for subject in range(1,n_subjects+1):
        observed = Y_train[subject][:,1:2]
        lasso = lassos[subject]
        lasso.fit(X_train,observed)
        predicted = lasso.predict(X_train)[:,np.newaxis]
        observed_train.append(observed)
        predicted_train.append(predicted)

        observed = Y_test[subject][:,1:2]
        predicted = lasso.predict(X_test)[:,np.newaxis]
        observed_test.append(observed)
        predicted_test.append(predicted)
    scores = {}
    for phase,predicted_,observed_ in [('train',predicted_train,observed_train),('test',predicted_test,observed_test)]:
        predicted = np.dstack(predicted_)
        observed = np.ma.dstack(observed_)
        predicted_mean = np.mean(predicted,axis=2,keepdims=True)
        #predicted_int = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted
        predicted_ple = regularize[1]*(predicted_mean) + (1-regularize[1])*predicted
        #predicted_dec = regularize[2]*(predicted_mean) + (1-regularize[2])*predicted
        #score1_ = scoring.score(predicted_int,observed,n_subjects=n_subjects)
        #r_int = scoring.r('int',predicted,observed)
        #r_ple = scoring.r('ple',predicted,observed)
        r_ple = scoring.r(None,predicted_ple,observed)
        r2_ple = scoring.r2(None,None,predicted_ple.mean(axis=2),observed.mean(axis=2))
        #r_dec = scoring.r('dec',predicted,observed)
        #score1 = scoring.rs2score(r_int,r_ple,r_dec)
        print("For subchallenge 1, %s phase, score = %.2f" % (phase,r_ple))
        print("For subchallenge 2, %s phase, score = %.2f" % (phase,r2_ple))
        scores[phase] = (r_ple,r2_ple)
    return lassos,scores['train'],scores['test']

示例#4

0

显示文件

文件： fit2.py 项目： jeriscience/olfaction-prediction

def rfc_(X_train,
         Y_train,
         X_test_int,
         X_test_other,
         Y_test,
         max_features=1500,
         n_estimators=1000,
         max_depth=None,
         min_samples_leaf=1):
    print(max_features)

    def rfc_maker():
        return RandomForestRegressor(max_features=max_features,
                                     n_estimators=n_estimators,
                                     max_depth=max_depth,
                                     min_samples_leaf=min_samples_leaf,
                                     n_jobs=-1,
                                     oob_score=True,
                                     random_state=0)

    rfc = rfc_maker()
    rfc.fit(X_train, Y_train)
    scores = {}
    for phase, X, Y in [('train', X_train, Y_train),
                        ('test', (X_test_int, X_test_other), Y_test)]:
        if phase == 'train':
            predicted = rfc.oob_prediction_
        else:
            predicted = rfc.predict(X[1])
            predicted_int = rfc.predict(X[0])
            predicted[:, 0] = predicted_int[:, 0]
            predicted[:, 21] = predicted_int[:, 21]
        observed = Y
        score = scoring.score2(predicted, observed)
        r_int = scoring.r2('int', 'mean', predicted, observed)
        r_ple = scoring.r2('ple', 'mean', predicted, observed)
        r_dec = scoring.r2('dec', 'mean', predicted, observed)
        r_int_sig = scoring.r2('int', 'sigma', predicted, observed)
        r_ple_sig = scoring.r2('ple', 'sigma', predicted, observed)
        r_dec_sig = scoring.r2('dec', 'sigma', predicted, observed)
        print("For subchallenge 2, %s phase, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" \
                % (phase,score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig))
        scores[phase] = (score, r_int, r_ple, r_dec, r_int_sig, r_ple_sig,
                         r_dec_sig)

    return rfc, scores['train'], scores['test']

示例#5

0

显示文件

文件： fit1.py 项目： bence-szalai/olfaction-prediction

def subject_regularize(rfcs,X_int,X_other,Y,oob=False,regularize=[0.75,0.3,0.65]):
    if len(regularize)==1:
        regularize = regularize*3
    observed_ = []
    predicted_ = []
    for subject in range(1,50):
        observed = Y['subject'][subject]
        rfc = rfcs[1][subject]
        if oob:
            predicted = rfc.oob_prediction_
        else:
            predicted = rfc.predict(X_other)
            predicted_int = rfc.predict(X_int)
            predicted[:,0] = predicted_int[:,0]
        observed_.append(observed)
        predicted_.append(predicted)
    predicted = np.dstack(predicted_)
    observed = np.ma.dstack(observed_)
    predicted_mean = np.mean(predicted,axis=2,keepdims=True)
    predicted_std = np.std(predicted,axis=2,keepdims=True)
    predicted_mean_std = np.hstack((predicted_mean,predicted_std)).squeeze()
    predicted_int = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted
    predicted_ple = regularize[1]*(predicted_mean) + (1-regularize[1])*predicted
    predicted_dec = regularize[2]*(predicted_mean) + (1-regularize[2])*predicted
    predicted = regularize[0]*(predicted_mean) + (1-regularize[0])*predicted
    r_int = scoring.r('int',predicted_int,observed)
    r_ple = scoring.r('ple',predicted_ple,observed)
    r_dec = scoring.r('dec',predicted_dec,observed)
    score1_ = scoring.score(predicted,observed,n_subjects=49)
    score1 = scoring.rs2score(r_int,r_ple,r_dec)
    #print(score1_,score1)
    print("For subchallenge %d, score = %.3f (%.3f,%.3f,%.3f)" % (1,score1,r_int,r_ple,r_dec))
    score2 = scoring.score2(predicted_mean_std,Y['mean_std'])
    r_int_mean = scoring.r2('int','mean',predicted_mean_std,Y['mean_std'])
    r_ple_mean = scoring.r2('ple','mean',predicted_mean_std,Y['mean_std'])
    r_dec_mean = scoring.r2('dec','mean',predicted_mean_std,Y['mean_std'])
    r_int_sigma = scoring.r2('int','sigma',predicted_mean_std,Y['mean_std'])
    r_ple_sigma = scoring.r2('ple','sigma',predicted_mean_std,Y['mean_std'])
    r_dec_sigma = scoring.r2('dec','sigma',predicted_mean_std,Y['mean_std'])
    print("For subchallenge %d, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" % \
         (2,score2,r_int_mean,r_ple_mean,r_dec_mean,r_int_sigma,r_ple_sigma,r_dec_sigma))
    return (r_int,r_ple,r_dec,r_int_mean,r_ple_mean,r_dec_mean,r_int_sigma,r_ple_sigma,r_dec_sigma)

示例#6

0

显示文件

文件： fit2.py 项目： bence-szalai/olfaction-prediction

def rfc_(X_train,Y_train,X_test_int,X_test_other,Y_test,
         max_features=1500,n_estimators=1000,max_depth=None,min_samples_leaf=1):
    print(max_features)
    def rfc_maker():
        return RandomForestRegressor(max_features=max_features,
                                     n_estimators=n_estimators,
                                     max_depth=max_depth,
                                     min_samples_leaf=min_samples_leaf,
                                     n_jobs=-1,
                                     oob_score=True,
                                     random_state=0)
        
    rfc = rfc_maker()
    rfc.fit(X_train,Y_train)
    scores = {}
    for phase,X,Y in [('train',X_train,Y_train),('test',(X_test_int,X_test_other),Y_test)]:
        if phase == 'train':
            predicted = rfc.oob_prediction_
        else:
            predicted = rfc.predict(X[1])
            predicted_int = rfc.predict(X[0])
            predicted[:,0] = predicted_int[:,0]
            predicted[:,21] = predicted_int[:,21]
        observed = Y
        score = scoring.score2(predicted,observed)
        r_int = scoring.r2('int','mean',predicted,observed)
        r_ple = scoring.r2('ple','mean',predicted,observed)
        r_dec = scoring.r2('dec','mean',predicted,observed)
        r_int_sig = scoring.r2('int','sigma',predicted,observed)
        r_ple_sig = scoring.r2('ple','sigma',predicted,observed)
        r_dec_sig = scoring.r2('dec','sigma',predicted,observed)
        print("For subchallenge 2, %s phase, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" \
                % (phase,score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig))
        scores[phase] = (score,r_int,r_ple,r_dec,r_int_sig,r_ple_sig,r_dec_sig)

    return rfc,scores['train'],scores['test']

示例#7

0

显示文件

文件： fit2.py 项目： bence-szalai/olfaction-prediction

def rfc_final(X,Y_imp,Y_mask,
              max_features,min_samples_leaf,max_depth,et,use_mask,trans_weight,
              trans_params,X_test_int=None,X_test_other=None,Y_test=None,n_estimators=100,seed=0,quiet=False):
    
    if X_test_int is None:
        X_test_int = X
    if X_test_other is None:
        X_test_other = X
    if Y_test is None:
        Y_test = Y_mask


    def rfc_maker(n_estimators=n_estimators,max_features=max_features,
                  min_samples_leaf=min_samples_leaf,max_depth=max_depth,et=False):
        if not et: 
            kls = RandomForestRegressor
            kwargs = {'oob_score':False}
        else:
            kls = ExtraTreesRegressor
            kwargs = {}

        return kls(n_estimators=n_estimators, max_features=max_features,
                   min_samples_leaf=min_samples_leaf, max_depth=max_depth,
                   n_jobs=-1, random_state=seed, **kwargs)
        
    rfcs = {}
    for col in range(42):
        prog(col,42)
        rfcs[col] = rfc_maker(n_estimators=n_estimators,
                                max_features=max_features[col],
                                min_samples_leaf=min_samples_leaf[col],
                                max_depth=max_depth[col],
                                et=et[col])

        if use_mask[col]:
            rfcs[col].fit(X,Y_mask[:,col])
        else:
            rfcs[col].fit(X,Y_imp[:,col])
    
    predicted = np.zeros((X_test_int.shape[0],42))
    for col in range(42):
        if et[col] or not np.array_equal(X,X_test_int):
            # Possibly check in-sample fit because there isn't any alternative.  
            if col in [0,21]:
                predicted[:,col] = rfcs[col].predict(X_test_int)
            else:
                predicted[:,col] = rfcs[col].predict(X_test_other)
        else:
            try:
                predicted[:,col] = rfcs[col].oob_prediction_
            except AttributeError:
                if col in [0,21]:
                    predicted[:,col] = rfcs[col].predict(X_test_int)
                else:
                    predicted[:,col] = rfcs[col].predict(X_test_other)

    def f_transform(x, k0, k1):
            return 100*(k0*(x/100)**(k1*0.5) - k0*(x/100)**(k1*2))

    for col in range(21):
        tw = trans_weight[col]
        k0,k1 = trans_params[col]
        p_m = predicted[:,col]
        p_s = predicted[:,col+21]
        predicted[:,col+21] = tw*f_transform(p_m,k0,k1) + (1-tw)*p_s
    
    observed = Y_test
    score = scoring.score2(predicted,observed)
    rs = {}
    for kind in ['int','ple','dec']:
        rs[kind] = {}
        for moment in ['mean','sigma']:
            rs[kind][moment] = scoring.r2(kind,moment,predicted,observed)
    
    if not quiet:
        print("For subchallenge 2:")
        print("\tScore = %.2f" % score)
        for kind in ['int','ple','dec']:
            for moment in ['mean','sigma']: 
                print("\t%s_%s = %.3f" % (kind,moment,rs[kind][moment]))
        
    return (rfcs,score,rs)

示例#8

0

显示文件

文件： fit2.py 项目： bence-szalai/olfaction-prediction

def rfc_cv(X,Y_imp,Y_mask,Y_test=None,n_splits=10,n_estimators=100,
           max_features=1500,min_samples_leaf=1,max_depth=None,rfc=True):
    if Y_mask is None:
        use_Y_mask = False
        Y_mask = Y_imp
    else:
        use_Y_mask = True
    if Y_test is None:
        Y_test = Y_mask
    if rfc:
        rfc_imp = RandomForestRegressor(max_features=max_features,
                                n_estimators=n_estimators,
                                max_depth=max_depth,
                                min_samples_leaf=min_samples_leaf,
                                oob_score=False,n_jobs=-1,random_state=0)
        rfc_mask = RandomForestRegressor(max_features=max_features,
                                n_estimators=n_estimators,
                                max_depth=max_depth,
                                min_samples_leaf=min_samples_leaf,
                                oob_score=False,n_jobs=-1,random_state=0)
    else:
        rfc_imp = ExtraTreesRegressor(max_features=max_features,
                                n_estimators=n_estimators,
                                max_depth=max_depth,
                                min_samples_leaf=min_samples_leaf,
                                  oob_score=False,n_jobs=-1,random_state=0)
        rfc_mask = ExtraTreesRegressor(max_features=max_features,
                                n_estimators=n_estimators,
                                max_depth=max_depth,
                                min_samples_leaf=min_samples_leaf,
                                  oob_score=False,n_jobs=-1,random_state=0)
    test_size = 0.2
    shuffle_split = ShuffleSplit(len(Y_imp),n_splits,test_size=test_size,random_state=0)
    test_size *= len(Y_imp)
    rs = {'int':{'mean':[],'sigma':[],'trans':[]},'ple':{'mean':[],'sigma':[]},'dec':{'mean':[],'sigma':[]}}
    scores = []
    for train_index,test_index in shuffle_split:
        rfc_imp.fit(X[train_index],Y_imp[train_index])
        predicted_imp = rfc_imp.predict(X[test_index])
        if use_Y_mask:
            rfc_mask.fit(X[train_index],Y_mask[train_index])
            predicted_mask = rfc_mask.predict(X[test_index])
        else:
            predicted_mask = predicted_imp
        observed = Y_test[test_index]
        rs_ = {'int':{},'ple':{},'dec':{}}
        for kind1 in ['int','ple','dec']:
            for kind2 in ['mean','sigma']:
                if kind2 in rs[kind1]:
                    if '%s_%s' % (kind1,kind2) in ['int_mean','ple_mean','dec_mean']:
                        r_ = scoring.r2(kind1,kind2,predicted_imp,observed)
                    else:
                        r_ = scoring.r2(kind1,kind2,predicted_mask,observed)
                    rs_[kind1][kind2] = r_
                    rs[kind1][kind2].append(r_)
        score = scoring.rs2score2(rs_)
        scores.append(score)
        rs['int']['trans'].append(scoring.r2(None,None,f_int(predicted_imp[:,0]),observed[:,21]))
    for kind1 in ['int','ple','dec']:
        for kind2 in ['mean','sigma','trans']:
            if kind2 in rs[kind1]:
                rs[kind1][kind2] = {'mean':np.mean(rs[kind1][kind2]),'sem':np.std(rs[kind1][kind2])/np.sqrt(n_splits)}
    scores = {'mean':np.mean(scores),'sem':np.std(scores)/np.sqrt(n_splits)}
    #print("For subchallenge 2, using cross-validation with:")
    #print("\tat most %s features:" % max_features)
    #print("\tat least %s samples per leaf:" % min_samples_leaf)
    #print("\tat most %s depth:" % max_depth)
    #print("\tscore = %.2f+/- %.2f" % (scores['mean'],scores['sem']))
    for kind2 in ['mean','sigma','trans']:
        for kind1 in ['int','ple','dec']:
            if kind2 in rs[kind1]:
                pass#print("\t%s_%s = %.3f+/- %.3f" % (kind1,kind2,rs[kind1][kind2]['mean'],rs[kind1][kind2]['sem']))
        
    return scores,rs

示例#9

0

显示文件

文件： fit2.py 项目： jeriscience/olfaction-prediction

def rfc_final(X,Y_imp,Y_mask,
              max_features,min_samples_leaf,max_depth,et,use_mask,
              Y_test=None,n_estimators=100,seed=0):
    
    if Y_test is None:
        Y_test = Y_mask
    def rfc_maker(n_estimators=n_estimators,max_features=max_features,
                  min_samples_leaf=min_samples_leaf,max_depth=max_depth,et=False):
        if not et: 
            return RandomForestRegressor(n_estimators=n_estimators,
                                     max_features=max_features,
                                     min_samples_leaf=min_samples_leaf,
                                     max_depth=max_depth,
                                     oob_score=True,
                                     n_jobs=-1,random_state=seed)
        else:
            return ExtraTreesRegressor(n_estimators=n_estimators,
                                max_features=max_features,
                                min_samples_leaf=min_samples_leaf,
                                max_depth=max_depth,
                                n_jobs=-1,random_state=seed)
        
    rfcs = {}
    for kind in ['int','ple','dec']:
        rfcs[kind] = {} 
        for moment in ['mean','sigma']:
            rfcs[kind][moment] = rfc_maker(n_estimators=n_estimators,
                                max_features=max_features[kind][moment],
                                min_samples_leaf=min_samples_leaf[kind][moment],
                                max_depth=max_depth[kind][moment],
                                et=et[kind][moment])

    for kind in ['int','ple','dec']:
        for moment in ['mean','sigma']:
            if use_mask[kind][moment]:
                rfcs[kind][moment].fit(X,Y_mask)
            else:
                rfcs[kind][moment].fit(X,Y_imp)
    
    predictions = {}
    for kind in ['int','ple','dec']:
        predictions[kind] = {}
        for moment in ['mean','sigma']:
            if et[kind][moment]:
                # Check in-sample fit because there isn't any alternative.  
                predictions[kind][moment] = rfcs[kind][moment].predict(X)
            else:
                predictions[kind][moment] = rfcs[kind][moment].oob_prediction_
    predicted = predictions['int']['mean'].copy()
    for i,moment in enumerate(['mean','sigma']):
        predicted[:,(0+21*i)] = predictions['int'][moment][:,(0+21*i)]
        predicted[:,(1+21*i)] = predictions['ple'][moment][:,(1+21*i)]
        predicted[:,(2+21*i):(21+21*i)] = predictions['dec'][moment][:,(2+21*i):(21+21*i)]

    observed = Y_test
    score = scoring.score2(predicted,observed)
    rs = {}
    predictions = {}
    for kind in ['int','ple','dec']:
        rs[kind] = {}
        for moment in ['mean','sigma']:
            rs[kind][moment] = scoring.r2(kind,moment,predicted,observed)
    rs['int']['trans'] = scoring.r2(None,None,f_int(predicted[:,0]),observed[:,0])

    print("For subchallenge 2:")
    print("\tScore = %.2f" % score)
    for kind in ['int','ple','dec']:
        for moment in ['mean','sigma']: 
            print("\t%s_%s = %.3f" % (kind,moment,rs[kind][moment]))
    
    return (rfcs,score,rs)

示例#10

0

显示文件

文件： fit2.py 项目： jeriscience/olfaction-prediction

def rfc_final(X,
              Y_imp,
              Y_mask,
              max_features,
              min_samples_leaf,
              max_depth,
              et,
              use_mask,
              Y_test=None,
              n_estimators=100,
              seed=0):

    if Y_test is None:
        Y_test = Y_mask

    def rfc_maker(n_estimators=n_estimators,
                  max_features=max_features,
                  min_samples_leaf=min_samples_leaf,
                  max_depth=max_depth,
                  et=False):
        if not et:
            return RandomForestRegressor(n_estimators=n_estimators,
                                         max_features=max_features,
                                         min_samples_leaf=min_samples_leaf,
                                         max_depth=max_depth,
                                         oob_score=True,
                                         n_jobs=-1,
                                         random_state=seed)
        else:
            return ExtraTreesRegressor(n_estimators=n_estimators,
                                       max_features=max_features,
                                       min_samples_leaf=min_samples_leaf,
                                       max_depth=max_depth,
                                       n_jobs=-1,
                                       random_state=seed)

    rfcs = {}
    for kind in ['int', 'ple', 'dec']:
        rfcs[kind] = {}
        for moment in ['mean', 'sigma']:
            rfcs[kind][moment] = rfc_maker(
                n_estimators=n_estimators,
                max_features=max_features[kind][moment],
                min_samples_leaf=min_samples_leaf[kind][moment],
                max_depth=max_depth[kind][moment],
                et=et[kind][moment])

    for kind in ['int', 'ple', 'dec']:
        for moment in ['mean', 'sigma']:
            if use_mask[kind][moment]:
                rfcs[kind][moment].fit(X, Y_mask)
            else:
                rfcs[kind][moment].fit(X, Y_imp)

    predictions = {}
    for kind in ['int', 'ple', 'dec']:
        predictions[kind] = {}
        for moment in ['mean', 'sigma']:
            if et[kind][moment]:
                # Check in-sample fit because there isn't any alternative.
                predictions[kind][moment] = rfcs[kind][moment].predict(X)
            else:
                predictions[kind][moment] = rfcs[kind][moment].oob_prediction_
    predicted = predictions['int']['mean'].copy()
    for i, moment in enumerate(['mean', 'sigma']):
        predicted[:, (0 + 21 * i)] = predictions['int'][moment][:,
                                                                (0 + 21 * i)]
        predicted[:, (1 + 21 * i)] = predictions['ple'][moment][:,
                                                                (1 + 21 * i)]
        predicted[:,
                  (2 + 21 * i):(21 + 21 * i)] = predictions['dec'][moment][:, (
                      2 + 21 * i):(21 + 21 * i)]

    observed = Y_test
    score = scoring.score2(predicted, observed)
    rs = {}
    predictions = {}
    for kind in ['int', 'ple', 'dec']:
        rs[kind] = {}
        for moment in ['mean', 'sigma']:
            rs[kind][moment] = scoring.r2(kind, moment, predicted, observed)
    rs['int']['trans'] = scoring.r2(None, None, f_int(predicted[:, 0]),
                                    observed[:, 0])

    print("For subchallenge 2:")
    print("\tScore = %.2f" % score)
    for kind in ['int', 'ple', 'dec']:
        for moment in ['mean', 'sigma']:
            print("\t%s_%s = %.3f" % (kind, moment, rs[kind][moment]))

    return (rfcs, score, rs)