Python preProc примеры использования

Язык программирования: Python

Пространство имен/Пакет: utils.dataprocess

Метод/Функция: preProc

Примеров на hotexamples.com: 3

Python preProc - 3 примера найдено. Это лучшие примеры Python кода для utils.dataprocess.preProc, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: carInsurancePredXgboost.py Проект: HandH1998/carInsurancePred

import xgboost as xgb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.metrics as metrics
import joblib
from utils.dataprocess import preProc, preProcTest, toJson
from sklearn.model_selection import train_test_split, GridSearchCV

# xgb原生接口

dataTrain = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_train.csv')
xTrain, yTrain, scaler = preProc(dataTrain, toNumpy=True)
# weight=反例数量/正例数量
weight = (yTrain.shape[0] - sum(yTrain)) * 1.0 / sum(yTrain)
xTrain, xValidation, yTrain, yValidation = train_test_split(xTrain, yTrain, test_size=0.2)

dtrain = xgb.DMatrix(xTrain, label=yTrain)
dvalidation = xgb.DMatrix(xValidation, label=yValidation)
params = {
    # xgboost宏观特征参数
    'booster': 'gbtree',
    'nthread': 5,  # 线程数
    'silent': 0,  # 为1时，静默开启

    # booster参数
    'eta': 0.1,  # learning rate 通过减少每一步的权重，提高鲁棒性
    'gamma': 0.1,  # 节点分裂所需要的最小损失函数下降值
    'max_depth': 9,  # 最大树高，限制过拟合
    'lambda': 2,  # 权重的L2正则项
    'alpha': 1,  # 权重的L1正则项

Пример #2

Показать файл

Файл: carInsurancePred.py Проект: HandH1998/carInsurancePred

        '''
        for i in range(self.layer_num):
            x = self.relu[i](self.bns[i](self.hiddens[i](x)))
        x = self.predict(x)
        return x


# 读入数据
dataTrain = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_train.csv')
weight_negative = sum(dataTrain['Response']) * 1.0 / dataTrain.shape[0]
# 由于类别不平衡，采用权重解决，分别设置负类和正类的权重
weights = [weight_negative,
           1 - weight_negative]  # [0.12293666666666667, 0.8770633333333333]
# pytorch要求权重输入为tensor
weights = torch.from_numpy(np.array(weights)).type(torch.FloatTensor)
xTrain, yTrain, scaler = preProc(dataTrain, toTensor=True)
torch.save(xTrain, 'xTrain.pt')
torch.save(yTrain, 'yTrain.pt')
xTrain, xValidation, yTrain, yValidation = train_test_split(xTrain,
                                                            yTrain,
                                                            test_size=0.2,
                                                            random_state=1)

torch_dataset = Data.TensorDataset(xTrain, yTrain)
loader = Data.DataLoader(
    dataset=torch_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
)
net = Net(14, 15, 2, 3)
print(net)

Пример #3

Показать файл

from sklearn.svm import SVC
import numpy as np
import pandas as pd
import joblib
import time
from utils.dataprocess import preProc, preProcTest, toJson
import sklearn.metrics as metrics

t1 = time.time()
# 训练数据处理
dataTrain = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_train.csv')
xTrain, yTrain, scaler = preProc(dataTrain)

# 建立模型
max_iter = 100000000
# model里加上class_weight='balanced',等价于正负例分别乘以权重sum(负例)、sum(正例)
# fit里有参数sample_weight,为每个sample赋上权重，是长度等于sample数量的array
# 这两个作用相同，只使用一个
model = SVC(C=1.0, kernel='rbf', gamma='auto', tol=0.2, cache_size=1024, class_weight='balanced', max_iter=max_iter)
model.fit(xTrain, yTrain, sample_weight=None)
score = model.score(xTrain, yTrain)
print('Score:', score)
pred_y = model.predict(xTrain)
fscore = metrics.f1_score(yTrain, pred_y)
print('Fvalue:', fscore)
equal1count = sum(pred_y == 1)
print('预测结果为1的数量:', equal1count)
print(pred_y)
joblib.dump(model, 'carInsurancePredSVM.model')
print('迭代次数', max_iter, '耗时:', time.time() - t1)