Exemplo n.º 1
0
class Model:
    lin_reg = Ridge()
    xscaler = sc()
    yscaler = sc()
    poly = PolynomialFeatures(degree=3)

    #Function to extract from DataFrame the necessary columns
    def extract(self, df):
        y = df['Score']
        df = df.drop(
            ['labour_id', 'Name', 'skill', 'Latitudes', 'Longitudes', 'Score'],
            axis=1)
        x = df
        return x, y

    #Function to be called after extract for preprocessing
    def preprocess(self, x, y):
        gen = x['Gender']
        y = np.array(y).reshape((1, len(y)))
        x = self.poly.fit_transform(x)
        x = self.xscaler.fit_transform(x)
        y = np.ravel(y)
        y = np.reshape(np.array(y), (len(y), 1))
        y = self.yscaler.fit_transform(y)
        x = pd.DataFrame(x)
        x[0] = gen
        dump(self.xscaler, open('xscaler+ve.pkl', 'wb'))
        dump(self.yscaler, open('yscaler+ve.pkl', 'wb'))
        dump(self.poly, open('poly.pkl', 'wb'))
        return x, y

    #Function for input preprocessting
    def preprocess_input(self, x):
        x = self.poly.transform(x)
        x = self.xscaler.transform(x)
        return x

    #Function to fit the model on x and y
    def fitmodel(self, x, y):
        k = self.lin_reg.fit(x, y)
        dump(self.lin_reg, open('model+ve.pkl', 'wb'))
        print(k)

    #Function to Check if the model is predicting as expected
    def predict(self, x):
        y_pred = self.lin_reg.predict(x)
        #print(x.shape)
        return self.yscaler.inverse_transform(y_pred)
Exemplo n.º 2
0
    def preprocess(self, x, type):
        gen = x['Gender']
        xscaler = sc()
        if type == '+':
            xscaler = self.xscaler_positive
        elif type == '-':
            xscaler = self.xscaler_negative

        x = self.poly.transform(x)
        x = xscaler.transform(x)
        x = pd.DataFrame(x)
        x[0] = gen
        return x
Exemplo n.º 3
0
    def predict(self, x, type, indexes):
        model = Ridge()
        yscaler = sc()
        if type == '+':
            model = self.model_positive
            yscaler = self.yscaler_positive
        elif type == '-':
            model = self.model_negative
            yscaler = self.yscaler_negative

        y_pred = model.predict(x)
        y_pred = yscaler.inverse_transform(y_pred)
        idscores = {}
        for i in range(len(indexes)):
            idscores[indexes[i]] = y_pred[i][0]
        return idscores
Exemplo n.º 4
0
    def preprocess_input(self, x, type):
        #Now x is Dictionary of lists
        xs = []
        indexes = {}
        c = 0
        for key, value in x.items():
            indexes[c] = key
            c += 1
            xs.append(value)

        xs = np.array(xs)
        x = self.poly.transform(xs)
        xscaler = sc()
        if type == '+':
            xscaler = self.xscaler_positive
        elif type == '-':
            xscaler = self.xscaler_negative
        x = xscaler.transform(x)
        return x, indexes
Exemplo n.º 5
0
x[:, 2] = le_x_2.fit_transform(x[:, 2])
ohec = ohe(categorical_features=[
    1
])  #index of the column is to be specified for the onehot encoding
x = ohec.fit_transform(x).toarray()
#now we have to fit the ohec object into
x = x[:,
      1:]  #to eliminate the dummy variable trap(like for three classes a dummy variable set of 2 is fine(third is automatically set))

#data splitting
from sklearn.model_selection import train_test_split as tts
x_train, x_test, y_train, y_test = tts(x, y, test_size=0.2, random_state=0)

#feature scaling
from sklearn.preprocessing import StandardScaler as sc
sc_x = sc()
x_train = sc_x.fit_transform(x_train)  #standardization scaling we are doing
x_test = sc_x.transform(x_test)

import keras as ke
from keras.models import Sequential  #to initialize the ann
from keras.layers import Dense  #to build the layers of the ann
from keras.layers import Dropout as dr

#INITIALIZING THE ANN
classifier = Sequential(
)  #sequential object created( ann as sequence of layers)

#adding the input layer and first hidden layer
classifier.add(
    Dense(activation="relu",
Exemplo n.º 6
0
def preprocess_normalize(df):
    df[quantitative_data] = sc().fit_transform(df[quantitative_data])
    df[numerical_data] = sc().fit_transform(df[numerical_data])
    return (df)
Exemplo n.º 7
0
import numpy as np  #Libreria númerica
import pandas as pd  #Libreria para analisis de datos

filepath = "C:/Users/Condominios Manzano/Desktop/Machine Learning/python-ml-course-master/python-ml-course-master/datasets/iris/iris.csv"
df = pd.read_csv(filepath)
Y = df.iloc[:, -1]
X = df.iloc[:, :-1]
#Modelizar PCA desde 0
#3 metodos para encontrar los vectores propios y los valores propios
#1) Matriz de variazas y covarianzas
#2) Matriz de correlaciones
#3) metodo singular value descomposition

#Para el metodo 1 se normaliza ya que los valores estan en diferentes unidades

X_std = sc().fit_transform(X)
x_ = np.mean(X_std, axis=0)
#1) Matriz de variazas y covarianzas
cov_ = (1 / (len(X) - 1)) * ((X_std - x_).T @ (X_std - x_))
ceig_val, ceig_vec = np.linalg.eig(cov_)

#2) Matriz de correlaciones
corr_ = np.corrcoef(X_std.T)
eig_val, eig_vec = np.linalg.eig(corr_)
cor = np.corrcoef(X.T)

#3) metodo singular value descomposition
u, s, v = np.linalg.svd(X_std.T)

# Seleccion de los componentes principales: se toman los vectores que expliquen la mayor parte de los datos
sum_com = ceig_val.sum()
Exemplo n.º 8
0
test_x_set = dataset.iloc[2375:, 1:125].values
test_y_set = dataset.iloc[2375:, 125:128].values


# Taking care of miising data : replace NaN to col average
col_mean = np.nanmean(training_x_set, axis=0)
inds = np.where(np.isnan(training_x_set))
training_x_set[inds] = np.take(col_mean, inds[1])

col_mean2 = np.nanmean(test_x_set, axis=0)
inds = np.where(np.isnan(test_x_set))
test_x_set[inds] = np.take(col_mean2, inds[1])


# data scaling(0-1)
scaled = sc()
training_x_set = scaled.fit_transform(training_x_set)

#%%

# making learning model
model = Sequential()
# 입력값 == ((6*9)+(8*1)) * 2
model.add(Dense(100, input_dim = 124, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.1))
model.add(Dense(100, activation = 'relu'))
model.add(Dropout(0.2))
model.add(Dense(80, activation = 'relu'))
model.add(Dropout(0.2))
Exemplo n.º 9
0
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler as sc
from sklearn.decomposition import PCA
from scipy import linalg as alg
r = pd.read_csv('bank_contacts.csv')
r_std = sc().fit_transform(r)
"""
method to find covariance matrix manually
mean_vec=np.mean(r_std,axis=0)
cov_mat=(r_std-mean_vec).T.dot((r_std-mean_vec)) / (r_std.shape[0]-1)
print("covariance matrix:",cov_mat)
"""
#by function
cov_mat = np.cov(r_std.T)
eig_val, eig_vec = alg.eig(cov_mat)
print(eig_val)
print(eig_vec)
#for visually confirming eigen pairs
eig_pairs = [(np.abs(eig_val[i]), eig_vec[:, i]) for i in range(len(eig_val))]
for i in eig_pairs:
    print(i[0])

t = PCA(n_components=2)
t.fit(r)
print(t.explained_variance_ratio_)
##for cumulative variance
#print(t.explained_variance_ratio_.sum())
u = t.fit(r_std)
f = t.transform(r_std)
Exemplo n.º 10
0
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from sklearn.preprocessing import MinMaxScaler as sc
import matplotlib.pyplot as plt
from keras.models import model_from_json

# Training
training_data = pd.read_csv('AMZNtrain.csv')
num_samples = training_data.shape[0]

trainX = training_data.iloc[:, 1:-2]

# Scaling each dimension/feature
list_sc = [sc(feature_range=(0, 1)) for i in range(4)]

for i in range(4):
    trainX.iloc[:,
                i] = list_sc[i].fit_transform(trainX.iloc[:, i].values.reshape(
                    num_samples, 1))

trainX_scaled = trainX.values

lookback = 60
X_train = []
y_train = []
for i in range(lookback, num_samples):
    # Grab all features, i.e open, high, low, and close
    X_train.append(trainX_scaled[i - lookback:i])
    # I want to predict the 'close' price, i.e. the last column
Exemplo n.º 11
0
@author: Thineth
"""

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

dataset = pd.read_csv('Position_salaries.csv')

X = dataset.iloc[:, 2:3].values
y = dataset.iloc[:, 2].values

from sklearn.preprocessing import StandardScaler as sc

sc_X = sc()
sc_y = sc()

X = sc_X.fit_transform(X)
y = sc_y.fit_transform(y)

from sklearn.svm import SVR

regressor = SVR(kernel='rbf')

regressor.fit(X, y)

y_pred = regressor.predict(sc_X.fit_transform([[6.5]]))
y_pred = sc_y.inverse_transform(y_pred)

plt.scatter(X, y, color='red')
Exemplo n.º 12
0
from sklearn.preprocessing import Imputer, LabelEncoder, OneHotEncoder, StandardScaler as sc
from sklearn.cross_validation import train_test_split

#Importing Datasets
dataset = pd.read_csv('datasets/Data.csv')
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:,3].values

# Handle Missing Data
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
imputer = imputer.fit(X[:,1:3])
X[:,1:3] = imputer.transform(X[:,1:3])


#Categorical Data handling
labelEncoder_X = LabelEncoder()
X[:,0] = labelEncoder_X.fit_transform(X[:,0])

oneHotEncoder = OneHotEncoder(categorical_features = [0])
X = oneHotEncoder.fit_transform(X).toarray()

labelEncoder_Y = LabelEncoder()
Y = labelEncoder_Y.fit_transform(Y)

# Splitting the dataset into the Training set and Test set
X_train, X_test, Y_Train, Y_Test = train_test_split(X,Y, test_size = 0.2, random_state = 0)

# Feature Scalings
sc_X  = sc()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
Exemplo n.º 13
0
# preprocess time
tempTime = cleanData['Time'].str.split(':', expand=True).pop(0)
tempTime = pd.DataFrame(tempTime, dtype=np.float)
cleanData = cleanData.drop(columns=['Time'])
cleanData.insert(3, 'Time', tempTime)

# change type to numpy array
data = cleanData.values
c = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13, 14, 15, 16, 17, 18]

# get features and labels
label = data[:, 12]
data = data[:, c]

# scale data
sc = sc()
data = sc.fit_transform(data)
print(data)
pca = pca(n_components=10)
fit = pca.fit_transform(data)
data = pd.DataFrame(data=fit)  # data after processing by PCA
# set up the model
logisticRegressionInstance = lr()
logisticRegressionInstance.fit(data, label.astype('int'))

print(label[182100:182114])
print(logisticRegressionInstance.get_params())
print(logisticRegressionInstance.predict(data[182100:182114]))
print(logisticRegressionInstance.predict_proba(data[182100:182114])[:, 1])