dataset = load_analytic_data("dataset.csv")

# Encoding the labels
genres = dataset.iloc[:, -1]  # Last column
encoder = LabelEncoder()
labels = encoder.fit_transform(genres)

# Scaling the features
scaler = StandardScaler()  # MinMaxScaler() can be also used
features = scaler.fit_transform(np.array(dataset.iloc[:, :-1], dtype=float))

# Dividing dataset into training and testing sets
# 80to20 split
x_train, x_test, y_train, y_test = train_test_split(features,
                                                    labels,
                                                    test_size=0.2)

# Create knn model
model = KNeighborsClassifier(n_neighbors=9, weights="distance")

# Training
model.fit(x_train, y_train)

# Testing
accuracy = model.score(x_test, y_test)
print(accuracy)

# Save model
save_sklearn_model(model, "knn.sk")
示例#2
0
dataset = load_analytic_data("dataset.csv")

# Encoding the labels
genres = dataset.iloc[:, -1]  # Last column
encoder = LabelEncoder()
labels = encoder.fit_transform(genres)

# Scaling the features
scaler = StandardScaler()  # MinMaxScaler() can be also used
features = scaler.fit_transform(np.array(dataset.iloc[:, :-1], dtype=float))

# Dividing dataset into training and testing sets
# 80to20 split
x_train, x_test, y_train, y_test = train_test_split(features,
                                                    labels,
                                                    test_size=0.2)

# Create knn model
model = SVC()  #  NuSVC, SVR, NuSVR, LinearSVC, LinearSVR and OneClassSVM

# Training
model.fit(x_train, y_train)

# Testing
accuracy = model.score(x_test, y_test)
print(accuracy)

# Save model
save_sklearn_model(model, "svm.sk")
示例#3
0
# Initialize array
x3 = np.empty((0, n))

# Append x3
for x1, x2 in x:
    temp = sin(x1) * x2
    x3 = np.append(x3, temp)

# Build the X matrix
X = np.insert(x, 2, x3, axis=1)

# Fit the model
linear_regressor.fit(X, y)

# Get the parameters
theta0 = linear_regressor.intercept_
theta1, theta2, theta3 = linear_regressor.coef_
print(
    "\nThe parameter values are: theta0 = {}, theta1 = {}, theta2 = {}, theta3 {}."
    .format(theta0, theta1, theta2, theta3))

# Make the predictions of the model
y_pred = linear_regressor.predict(X)

# Print the prediction
MSE = evaluate_predictions(y_pred, y)
print("Task 1 Linear Rregression Model MSE: {}\n".format(MSE))

# Save the model
save_sklearn_model(linear_regressor, '../deliverable/Linear_Regression.pickle')
示例#4
0
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from utils import save_sklearn_model
from deliverable.run_model import load_data
from deliverable.run_model import transform

if __name__ == "__main__":
    X, y = load_data("../data/data.npz")

    X_train, _, y_train, _ = train_test_split(X, y, train_size=0.85, test_size=0.15, random_state=1)

    model = make_pipeline(FunctionTransformer(transform), LinearRegression())

    model.fit(X_train, y_train)
    save_sklearn_model(model, "../deliverable/t1.pickle")
from sklearn.linear_model import LinearRegression


if __name__ == '__main__':
    """ 
        Store arrays from data.npz in x and y.
        Split data into train and test set.
        Distinguish between features and labels.
        
        Note: since ordinary least squares is invariant, there is no need for standardization.
    """
    x, y =  utils.load_data("../data/data.npz")
    X = np.column_stack((x, np.sin(x[:, 0]) * x[:, 1]))

    train_set, test_set, train_labels, test_labels = train_test_split(X, y, test_size=0.2, random_state=42)

    """ 
        Linear model.
            From sklearn train the linear regression model on the train set.         
    """

    regr = LinearRegression()
    regr.fit(train_set, train_labels)

    """
        Save the model in deliverable.
    """
    utils.save_sklearn_model(regr, '../deliverable/linear_regression.pickle')