示例#1
0
def model_pred(tr_x, tr_y, te_x, tr_x17, tr_y17, te_x17, quarter):
    tr_x, tr_y, _, _, pid = data_quarter(tr_x, tr_y, tr_x17, tr_y17, quarter, False)
    tr_x = MinMaxScaler().fit_transform(tr_x.as_matrix())
    if quarter == 3:
        te_x = te_x.rename(columns={'parcelid':'date'})
        te_x1 = te_x
        te_x1.loc[:,'date'] = 10
        te_x1 = MinMaxScaler().fit_transform(te_x1.as_matrix())
        te_x2 = te_x
        te_x2.loc[:,'date'] = 11
        te_x2 = MinMaxScaler().fit_transform(te_x2.as_matrix())
        te_x3 = te_x
        te_x3.loc[:,'date'] = 12
        te_x3 = MinMaxScaler().fit_transform(te_x3.as_matrix())
        tr_y = np.squeeze(tr_y.as_matrix())
        
        np.random.seed(0)
        clf = KNeighborsRegressor(n_neighbors=int(np.sqrt(tr_x.shape[0])), weights='distance', p=1)
        clf.fit(tr_x, tr_y)
        pred1 = clf.predict(te_x1)
        pred2 = clf.predict(te_x2)
        pred3 = clf.predict(te_x3)
    elif quarter == 7:
        te_x17 = te_x17.rename(columns={'parcelid':'date'})
        te_x1 = te_x17
        te_x1.loc[:,'date'] = 22
        te_x1 = MinMaxScaler().fit_transform(te_x1.as_matrix())
        te_x2 = te_x17
        te_x2.loc[:,'date'] = 23
        te_x2 = MinMaxScaler().fit_transform(te_x2.as_matrix())
        te_x3 = te_x17
        te_x3.loc[:,'date'] = 24
        te_x3 = MinMaxScaler().fit_transform(te_x3.as_matrix())
        tr_y = np.squeeze(tr_y.as_matrix())
        
        np.random.seed(0)
        clf = KNeighborsRegressor(n_neighbors=int(np.sqrt(tr_x.shape[0])), weights='distance', p=1)
        print 'training all done!'
        clf.fit(tr_x, tr_y)
        pred1 = clf.predict(te_x1)
        pred2 = pred1 #clf.predict(te_x2)
        pred3 = pred1 #clf.predict(te_x3)
        
    pred_train = clf.predict(tr_x)
    print('train mae score: {}'.format(mean_absolute_error(tr_y, pred_train)))
    pid1 = pid.to_frame().assign(f_knn=pred1)
    pid2 = pid.to_frame().assign(f_knn=pred2)
    pid3 = pid.to_frame().assign(f_knn=pred3)

    return pid1, pid2, pid3
def model_pred(tr_x, tr_y, te_x, tr_x17, tr_y17, te_x17, quarter, clf):
    tr_x, tr_y, _, _, pid = data_quarter(tr_x, tr_y, tr_x17, tr_y17, quarter,
                                         False)
    tr_x = MinMaxScaler().fit_transform(tr_x.as_matrix())
    if quarter == 3:
        te_x = te_x.rename(columns={'parcelid': 'date'})
        te_x1 = te_x
        te_x1.loc[:, 'date'] = 10
        te_x1 = MinMaxScaler().fit_transform(te_x1.as_matrix())
        te_x2 = te_x
        te_x2.loc[:, 'date'] = 11
        te_x2 = MinMaxScaler().fit_transform(te_x2.as_matrix())
        te_x3 = te_x
        te_x3.loc[:, 'date'] = 12
        te_x3 = MinMaxScaler().fit_transform(te_x3.as_matrix())
        tr_y = np.squeeze(tr_y.as_matrix())

        np.random.seed(0)
        clf.fit(tr_x, tr_y)
        pred1 = clf.predict(te_x1)
        pred2 = clf.predict(te_x2)
        pred3 = clf.predict(te_x3)
    elif quarter == 7:
        te_x17 = te_x17.rename(columns={'parcelid': 'date'})
        te_x1 = te_x17
        te_x1.loc[:, 'date'] = 10
        te_x1 = MinMaxScaler().fit_transform(te_x1.as_matrix())
        te_x2 = te_x17
        te_x2.loc[:, 'date'] = 11
        te_x2 = MinMaxScaler().fit_transform(te_x2.as_matrix())
        te_x3 = te_x17
        te_x3.loc[:, 'date'] = 12
        te_x3 = MinMaxScaler().fit_transform(te_x3.as_matrix())
        tr_y = np.squeeze(tr_y.as_matrix())

        np.random.seed(0)
        clf.fit(tr_x, tr_y)
        pred1 = clf.predict(te_x1)
        pred2 = clf.predict(te_x2)
        pred3 = clf.predict(te_x3)

    pred_train = clf.predict(tr_x)
    print('train mae score: {}'.format(mean_absolute_error(tr_y, pred_train)))
    pid1 = pid.to_frame().assign(f_svm=pred1)
    pid2 = pid.to_frame().assign(f_svm=pred2)
    pid3 = pid.to_frame().assign(f_svm=pred3)

    return pid1, pid2, pid3