-
Notifications
You must be signed in to change notification settings - Fork 0
/
hw_28_vlasov_ivan_1577480224.py
72 lines (56 loc) · 2.01 KB
/
hw_28_vlasov_ivan_1577480224.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
"""Untitled6.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/19GtMkn-krgPsP1wxEEqNqxRwbD7FdLRZ
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_excel("credits_100.xlsx")
print(df.head(10))
sns.pairplot(df, height=3, aspect=1.5, vars=['salary', 'credit', 'married'], diag_kind='kde', kind='reg')
print()
print('корреляция между числовыми признаками')
fields = ['age', 'salary', 'married', 'children', 'credit']
corr = df[fields].corr()
print(corr)
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
sns.heatmap(corr,mask=mask, cmap=sns.light_palette('grey'))
from mpl_toolkits.mplot3d import axes3d
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
x1 = df['salary']
y1 = df['married']
z1 = df['credit']
ax.scatter(x1, y1, z1, marker='o', label='blue')
plt.figure(figsize=(16, 6))
df.boxplot(column=['salary', 'credit'], vert=False)
plt.show()
print('построение регрессионно модели')
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
X = df[['credit','age', 'married', 'children']]
y = df['salary']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=80)
print(X_test[:5])
print(y_test[:5])
#тренируем регрессию
regressor = LinearRegression()
regressor.fit(X_train, y_train)
print('коэф-ты регрессии')
print(regressor.coef_)
#расчетные значения у для X_test
y_pred = regressor.predict(X_test)
for y1, y2 in zip(y_test[:10], y_pred[:10]):
print(y1, y2)
E = [abs(y1 - y2) / ((y1+y2)/2) for y1, y2 in zip(y_test, y_pred)]
print(E)
avg_E = sum(E) / len(E)
print('оценка точности')
print(f'E среднее = {avg_E:.3f}')
accuracy = regressor.score(X_test, y_test)
print(accuracy)