Exemplo n.º 1
0

#对特征进行归一化
from sklearn.preprocessing import Normalizer
X=Normalizer().fit_transform(X.astype(float))


# #对特征进行对数转化
from numpy import log1p
from sklearn.preprocessing import FunctionTransformer
X=FunctionTransformer(log1p).fit_transform(X)


#对特征进行标准化
from sklearn.preprocessing import StandardScaler
StandardScaler().fit_transform(X)

#对特征进行区间缩放法
from sklearn.preprocessing import MinMaxScaler
MinMaxScaler().fit_transform(X)


#对特征进行定量二值化
from sklearn.preprocessing import Binarizer
Binarizer(threshold=3).fit_transform(iris.data)


#K折交叉验证
from sklearn.model_selection  import cross_val_score
score = cross_val_score(model, X,y, cv=5)
print("score=",score)
Exemplo n.º 2
0
ct_pred = clf.predict(Normalizer().fit_transform(test_ct))

res = pd.DataFrame({'ID': kid, 'Expected': ct_pred})
res.to_csv('outcome.csv', index=False)

# ===============LGBM count (more tuning needed)========================================================
star = star.astype(int) - 1

xtrain_ct, xvalid_ct, ytrain, yvalid = train_test_split(train_ct,
                                                        star,
                                                        stratify=star,
                                                        random_state=960724,
                                                        test_size=0.2,
                                                        shuffle=True)

lg_train = lgb.Dataset(xtrain_ct.astype(np.float64), label=ytrain)
lg_val = lg_train.create_valid(xvalid_ct.astype(np.float64), label=yvalid)

param = {
    'objective': 'multiclass',
    'task': 'train',
    'num_threads': 4,
    'seed': 960724,
    'early_stopping_round': 50,
    'verbosity': 1,
    'num_class': 5,
    'learning_rate': 0.1,
    'num_leaves': 31,
    'max_depth': -1,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8