forked from gabrielgarza/exoplanet-deep-learning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess_data.py
54 lines (44 loc) · 1.96 KB
/
preprocess_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
import numpy as np
from scipy import ndimage, fft
from sklearn.preprocessing import normalize, StandardScaler, MinMaxScaler
class LightFluxProcessor:
def __init__(self, fourier=True, normalize=True, gaussian=True, standardize=True):
self.fourier = fourier
self.normalize = normalize
self.gaussian = gaussian
self.standardize = standardize
def fourier_transform(self, X):
return np.abs(fft(X, n=X.size))
def process(self, df_train_x, df_dev_x):
# Generate X and Y sets
# df_train_x = df_train.drop('LABEL', axis=1)
# df_dev_x = df_dev.drop('LABEL', axis=1)
# df_train_y = df_train.LABEL
# df_dev_y = df_dev.LABEL
# Apply fourier transform
if self.fourier:
print("Applying Fourier...")
df_train_x = df_train_x.apply(self.fourier_transform,axis=1)
df_dev_x = df_dev_x.apply(self.fourier_transform,axis=1)
# Keep first half of data as it is symmetrical after previous steps
df_train_x = df_train_x.iloc[:,:(df_train_x.shape[1]//2)].values
df_dev_x = df_dev_x.iloc[:,:(df_dev_x.shape[1]//2)].values
# Normalize
if self.normalize:
print("Normalizing...")
df_train_x = pd.DataFrame(normalize(df_train_x))
df_dev_x = pd.DataFrame(normalize(df_dev_x))
# Gaussian filter to smooth out data
if self.gaussian:
print("Applying Gaussian Filter...")
df_train_x = ndimage.filters.gaussian_filter(df_train_x, sigma=10)
df_dev_x = ndimage.filters.gaussian_filter(df_dev_x, sigma=10)
if self.standardize:
# Standardize X data
print("Standardizing...")
std_scaler = StandardScaler()
df_train_x = std_scaler.fit_transform(df_train_x)
df_dev_x = std_scaler.transform(df_dev_x)
print("Finished Processing!")
return df_train_x, df_dev_x