示例#1
0
from sklearn.metrics import mean_absolute_error


'''加载数据集'''
boston=datasets.load_boston()
X=boston.data[:,5]#只选取第5列——房间数量
y=boston.target
# plt.scatter(X,y)
# plt.show()
'''会有很多超出上限的点,需要去除'''
X=X[y<50.0]
y=y[y<50.0]
# plt.scatter(X,y)
# plt.show()
'''进行测试训练分割'''
X_train, X_test, y_train, y_test = train_test_split(X,y,test_ratio=0.2,seed=666)
reg2=SimpleLinearRegression2()
reg2.fit(X_train,y_train)
print(reg2.a_)
print(reg2.b_)
#看看训练怎么样
plt.scatter(X_train,y_train)
# plt.plot(X_train,reg2.predict(X_train),color='red')
# plt.show()

#预测一波
y_predict = reg2.predict(X_test)

"""scikit learn里面MSE与MAE"""
mean_squared_error(y_test,y_predict)
mean_absolute_error(y_test,y_predict)
示例#2
0
#encoder = encoding.encode_gerente(df_treino,'numerical')
#df_treino.loc[:,'Gerente_Negocio'] = encoder.fit_transform(df_treino['Gerente_Negocio'])

df_treino = tratamento.fix_unidade_e_area(df_treino)
df_treino = tratamento.add_features(df_treino)
df_treino = tratamento.drop_unecessary(df_treino)
df_treino = tratamento.get_cidade_e_is_privado(df_treino)
df_treino = tratamento.get_objetivo_client(df_treino)
df_treino = encoding.encoding_foco(df_treino)

df_treino.loc[:, 'Area_Unidade_Negocio2'] = df_treino['Area_Unidade_Negocio']
df_treino = encoding.get_dummies(df_treino)

df_treino1 = df_treino[df_treino.Resultado == 1]

X_train, X_test, y_train, y_test = model_selection.train_test_split(
    df_treino1, res='ResultadoPrevisao')
all_feats = [
    f for f in X_train.columns if f not in [
        'Id_ON', 'Id_Cadencia', 'Area_Unidade_Negocio2', 'ResultadoPrevisao',
        'TrimestreResultado'
    ]
]
clf = RandomForestClassifier(n_estimators=500, max_features=1, random_state=30)
selected_features = all_feats  #model_selection.feature_selection(clf,X_train[all_feats],y_train,k=40)

model = GradientBoostingClassifier(max_features=0.7, random_state=10)
model.fit(X_train[selected_features], y_train)
model.fit(df_treino1[selected_features], df_treino1.ResultadoPrevisao)

df = X_test.copy()
predictions = model.predict(df[selected_features])
示例#3
0
'''
4-3使用封装好的包来调用
'''
from functions.model_selection import train_test_split
from functions.kNN import KNNClassifier
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()
X = iris.data  #特征矩阵
y = iris.target  #结果标签向量
X_train, X_test, y_train, y_test = train_test_split(X, y)
my_knn_clf = KNNClassifier(k=3)
my_knn_clf.fit(X_train, y_train)
y_predict = my_knn_clf.predict(X_test)  #有30个预测结果看看和y_test有的不一样
'''
计算模型的准确率我的直白方法vs老师的简练方法
'''
#count=0
# for i in range(len(y_predict)):
#     if y_predict[i]==y_test[i]:
#         count+=1
# print(count*100/len(y_test))
sum(y_predict == y_test) / len(
    y_test)  #会返回一个全是bool的列表,true为1false为0加起来就是预测对的的数目之后除以总数
示例#4
0
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target
"""二分类怎么解决三分类的鸢尾花?只选前两类和两个特征(画图方便)"""
X = X[y < 2, :2]
y = y[y < 2]
"""使用自己的方法"""
from functions.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
from functions.LogisticRegression import LogisticRegression

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)


def x2(x1):
    return (-log_reg.coef_[0] * x1 - log_reg.intercept_) / log_reg.coef_[1]


x1_plot = np.linspace(4, 8, 1000)
x2_plot = x2(x1_plot)

plt.plot(x1_plot, x2_plot)
plt.scatter(X[y == 0, 0], X[y == 0, 1], color="red")
plt.scatter(X[y == 1, 0], X[y == 1, 1], color="blue")
plt.show()