/
Data.py
115 lines (92 loc) · 3.44 KB
/
Data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
import scipy.io
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.utils import to_categorical
import pandas as pd
import seaborn as sb
from pylab import rcParams
from pandas import DataFrame
rcParams['figure.figsize'] = 10, 8
sb.set_style('whitegrid')
#Load NN Data from mat file into dict.
mat=scipy.io.loadmat(r'/home/dl2020/Python/NeuralNetwork/NNData.mat')
print(mat.keys())
#returns the NNData value forom dict.
NNData=mat["NNData"]
print(NNData.shape)
#convert list to panda dataframe
df=DataFrame(NNData)
df.columns=['dev._stage','dimple_ang.','radii_ratio','orientation_ang.','area','force']
print(df.head(10))
#Checking for missing values
print(df.isnull().sum())
#print data information
print(df.info())
#Converting categorical variables to a dummy indicators
stage=pd.get_dummies(df['dev._stage'],drop_first=False)
print(stage.head(10))
df.drop(['dev._stage'],axis=1,inplace=True)
df=pd.concat([df,stage],axis=1)
print(df.head())
#rename new columns
df.rename(columns = {1:'ds1'}, inplace = True)
df.rename(columns = {2:'ds2'}, inplace = True)
df.rename(columns = {3:'ds3'}, inplace = True)
#change order of columns
Col_Order = ['dimple_ang.','radii_ratio','orientation_ang.','area','ds1','ds2','ds3','force']
df = df.reindex(columns=Col_Order)
print(df.head())
# Plotting the heatmap of correlation between features for checking the dependency between features
sb.heatmap(df.corr(), annot=True)
plt.show()
#data normalization using mean normalization
#df['dimple_ang.']=(df['dimple_ang.']-df['dimple_ang.'].mean())/df['dimple_ang.'].std()
#df['orientation_ang.']=(df['orientation_ang.']-df['orientation_ang.'].mean())/df['orientation_ang.'].std()
#print(df.head())
#data normalization using min-max normalization
df['dimple_ang.']=(df['dimple_ang.']-df['dimple_ang.'].min())/(df['dimple_ang.'].max()-df['dimple_ang.'].min())
df['orientation_ang.']=(df['orientation_ang.']-df['orientation_ang.'].min())/(df['orientation_ang.'].max()-df['orientation_ang.'].min())
print(df.head())
# Viewing the data statistics
print(df.describe())
X = df.iloc[:,0:7]
y = df.iloc[:,7:8]
#######################################Implementation with List########################
# X=NNData[:,0:5]
# Y=NNData[:,5:]
# print(X.shape)
# print(Y.shape)
# print("\n\n")
# #Data Normalization
# print("maximum dimple angle is", X[:,1:2].max())
# print("maximum orientation angle is", X[:,3:4].max())
# X[:,1:2]=X[:,1:2]/180
# print("maximum normalized dimple angle is", X[:,1:2].max())
# X[:,3:4]=X[:,3:4]/180
# print("maximum normalized orientation angle is", X[:,3:4].max())
# # one-hot encode the developmental stage categorical data
# data=X[:,0:1]
# data = to_categorical(data)
# print(data)
# print(data.shape)
# X=np.hstack((X,data))
# print(X)
# print(X.shape)
# X=np.delete(X,0,1)
# print(X)
# print(X.shape)
#######################################Implementation with List########################
#split data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=5)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
#converting dataframe to list
X_train_l = X_train.values.tolist()
y_train_l = y_train.values.tolist()
X_test_l = X_test.values.tolist()
y_test_l = y_test.values.tolist()
#save data into mat file
scipy.io.savemat('/home/dl2020/Python/BostonHousing/Data.mat', {'X_train':X_train_l,'y_train':y_train_l,'X_test':X_test_l,'y_test':y_test_l})