def main(station, k_neighbors, stations, transfers, beverage_data): knc = KNeighborsClassifier(stations, transfers, beverage_data) knc.fit(station, k_neighbors) pred = knc.predict() if pred['tea'] > pred['coffee']: print(f'На станции {station} пьют чай') elif pred['coffee'] > pred['tea']: print(f'На станции {station} пьют кофе') else: print(f'На станции {station} пьют и чай и кофе') return 0
import sklearn.utils as utils import sklearn.datasets as datasets from knn import KNeighborsClassifier iris = datasets.load_iris() X = iris.data y = iris.target ynames = iris.target_names X, y = utils.shuffle(X, y, random_state=1) train_set_size = 100 X_train = X[:train_set_size] # selects first 100 rows (examples) for train set y_train = y[:train_set_size] X_test = X[ train_set_size:] # selects from row 100 until the last one for test set y_test = y[train_set_size:] k = 5 knn = KNeighborsClassifier(k=k) knn.fit(X_train, y_train) y_pred_test = knn.predict(X_test) print("Accuracy of KNN test set:", knn.score(y_pred_test, y_test))
import sklearn.utils as utils import sklearn.datasets as datasets from knn import KNeighborsClassifier iris = datasets.load_iris() X = iris.data y = iris.target ynames = iris.target_names X, y = utils.shuffle(X, y, random_state=1) train_set_size = 100 X_train = X[: train_set_size] # selects first 100 rows (examples) for train set 100,4 y_train = y[:train_set_size] # 100 X_test = X[ train_set_size:] # selects from row 100 until the last one for test set y_test = y[train_set_size:] k = 5 knn = KNeighborsClassifier(k=k) # knn.fit(X_train, y_train) y_pred_test = knn.predict(X_train, X_test, y_train) print("Accuracy of KNN test set:", knn.score(y_pred_test, y_test))
from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import fetcher import processor import normalizer from knn import KNeighborsClassifier print('Fetching data...') raw_data = fetcher.fetchVehicles() print('Processing data...') processed_data = processor.process(raw_data) normalized_data_x = normalizer.normalize(processed_data["x"]) train_x, test_x, train_y, test_y = train_test_split(normalized_data_x, processed_data["y"], test_size=0.5) classifier = KNeighborsClassifier(1) classifier.fit(train_x, train_y) predictions = classifier.predict(test_x) print(f'Done! Accuracy: {accuracy_score(test_y, predictions)}')
# Build the mesh for step_x in np.arange(min_x, max_x, resolution): for step_y in np.arange(min_y, max_y, resolution): mesh_in.append([step_x, step_y]) # Convert to numpy for performance reasons mesh_in = np.array(mesh_in) # Single pass to count elements used for debug l = str(len(mesh_in)) print( 'Predicting.. | total points to predict = %s | This might take some time..' % l) mesh_out = np.array(knn.predict(mesh_in)) print('Computing mesh colors..') c = lambda x: 'turquoise' if x == 1 else 'gold' if x == 2 else 'dodgerblue' # Prepare the mesh colors mesh_colors = [colors[c(p)] for p in mesh_out] features_colors = [colors[c(p)] for p in labels] print("Drawing graph | total points = " + l) # Draw the decision boundaries fig = plt.figure() plt.title('Decision boundary for ' + df) plt.ylabel('Feature 1')
outputData.append(data[20]) return inputData, outputData if __name__ == '__main__': inputToLearn, outputToLearn = readData("file.txt") inputTest = [] outputTest = [] for i in range(10): index = randint(0, len(inputToLearn) - 1) inputTest.append(inputToLearn[index]) outputTest.append(outputToLearn[index]) inputToLearn.remove(inputToLearn[index]) outputToLearn.remove(outputToLearn[index]) classifier = KNeighborsClassifier() classifier.fit(inputToLearn, outputToLearn) accuracy = 0 for i in range(len(inputTest)): if classifier.predict(inputTest[i]) == outputTest[i]: accuracy += 1 print("The accuracy is: ", float(accuracy) / len(inputTest))
labels = [] for i in range(0, len(journeyPatternIds.keys())): index = journeyPatternIds.keys()[i] labels.append(journeyPatternIds[index]) test = [] for i in range(0, len(testSet.keys())): index = testSet.keys()[i] test.append([]) for t, lon, lat in testSet[index]: test[i].append([lat, lon]) # Create the csv with the results of the KNN on the requested file neigh = KNeighborsClassifier(n_neighbors=5) neigh.fit(samples, labels) result = neigh.predict(test, haversine) dic = {"Test_Trip_ID": range(len(test)), "Predicted_JourneyPatternID": result} out_df = pd.DataFrame(dic, columns=['Test_Trip_ID', 'Predicted_JourneyPatternID']) out_df.to_csv("testSet_JourneyPatternIDs.csv", sep='\t', index=False) # Do a 10 fold for our KNN and save results in file kf = KFold(n_splits=10) f = open('10fold_2p_out.txt', 'w') sum_acc = 0 count = 1 for train_index, test_index in kf.split(samples): print("iteration ", count)