def _main(): X, y = data_preprocessing.import_dataset('Salary_Data.csv', slice(0, -1), 1) X_train, X_test, y_train, y_test = data_preprocessing.split_train_test( X, y, 1 / 3) regressor = LinearRegression() regressor.fit(X_train, y_train) visualize_performance_on_training(regressor, X_train, y_train) visualize_performance_on_test(regressor, X_test, y_test)
def _main(): features, labels = data_preprocessing.import_dataset('50_Startups.csv', slice(0, 4), 4) features, _ = data_preprocessing.one_hot_encode_categorical_features(features, [3]) features_train, features_test, labels_train, labels_test = \ data_preprocessing.split_train_test(features, labels, test_size=0.2) regressor = LinearRegression() regressor.fit(features_train, labels_train) labels_test_pred = regressor.predict(features_test) features_opt_idxs = backward_elimination(features, labels)
def main(): features, labels = data_preprocessing.import_dataset('Position_Salaries.csv', [1], [2]) labels = labels.flatten() regressor = RandomForestRegressor(n_estimators=100, random_state=0) regressor.fit(features, labels) plt.scatter(features, labels, color='red', label='Training examples') feature_grid = np.arange(min(features), max(features), step=0.01) feature_grid = feature_grid.reshape((len(feature_grid), 1)) plt.plot(feature_grid, regressor.predict(feature_grid), color='blue', label='Predictions') plt.legend() plt.xlabel('Position level') plt.ylabel('Salary') plt.show()
def main(): features, labels = data_preprocessing.import_dataset( 'Position_Salaries.csv', [1], [2]) polynomial_features = PolynomialFeatures(degree=4).fit_transform(features) regressor = LinearRegression() regressor.fit(polynomial_features, labels) plt.scatter(features, labels, color='red', label='Training examples') plt.plot(features, regressor.predict(polynomial_features), color='blue', label='Predictions') plt.legend() plt.xlabel('Position level') plt.ylabel('Salary') plt.show()
def main(): features, labels = data_preprocessing.import_dataset('Position_Salaries.csv', [1], [2]) feature_scaler = StandardScaler() features = feature_scaler.fit_transform(features) label_scaler = StandardScaler() labels = label_scaler.fit_transform(labels) labels = labels.flatten() regressor = SVR(kernel='rbf', gamma='scale') regressor.fit(features, labels) plt.scatter(features, labels, color='red', label='Training examples') plt.plot(features, regressor.predict(features), color='blue', label='Predictions') plt.legend() plt.xlabel('Position level') plt.ylabel('Salary') plt.show()
def main(): features, labels = data_preprocessing.import_dataset('Position_Salaries.csv', [1], [2]) regressor = DecisionTreeRegressor(random_state=0) regressor.fit(features, labels) plt.scatter(features, labels, color='red', label='Training examples') # plot with high resolution because with only one feature, the simple plot will # show that all training examples are matched by the prediction, which is because # of the way decision tree regression predicts a value from an average of a region. feature_grid = np.arange(min(features), max(features), step=0.01) feature_grid = feature_grid.reshape((len(feature_grid), 1)) plt.plot(feature_grid, regressor.predict(feature_grid), color='blue', label='Predictions') plt.legend() plt.xlabel('Position level') plt.ylabel('Salary') plt.show()
from data_preprocessing import data_preprocessing from sklearn.preprocessing import StandardScaler from sklearn.linear_model import LogisticRegression from plots.classification_result_visualizer import visualize_two_feature_classification features, labels = data_preprocessing.import_dataset( 'datasets/Social_Network_Ads.csv', [2, 3], 4) feature_scaler = StandardScaler() features = feature_scaler.fit_transform(features) features_train, features_test, labels_train, labels_test = \ data_preprocessing.split_train_test(features, labels, test_size = 0.25) classifier = LogisticRegression(random_state=0, solver='liblinear') classifier.fit(features, labels) visualize_two_feature_classification(features_train, labels_train, classifier, xlabel='Age', ylabel='Estimated salary')
from data_preprocessing import data_preprocessing from sklearn.cluster import KMeans from matplotlib import pyplot as plt features = data_preprocessing.import_dataset('datasets/Mall_Customers.csv', [3, 4]) # choose K using elbow method max_feature_count = 10 wcss = [] for i in range(1, max_feature_count + 1): kmeans = KMeans(n_clusters=i, init='k-means++', n_init=10, max_iter=300) kmeans.fit_predict(features) wcss.append(kmeans.inertia_) plt.plot(range(1, max_feature_count + 1), wcss) plt.xlabel('Number of clusters') plt.ylabel('WCSS') plt.show() # look at the plot and choose the value using the elbow method K = 5 kmeans = KMeans(n_clusters=K, init='k-means++', n_init=10, max_iter=300) cluster_pred = kmeans.fit_predict(features) # visualize the clusters. only applicable when the number of features is 2 or 3. colors = ['red', 'blue', 'green', 'cyan', 'magenta'] for i in range(K): cluster_item_indexes = cluster_pred == i plt.scatter(features[cluster_item_indexes, 0], features[cluster_item_indexes, 1], c=colors[i],