#Implement a KNN classifier in Python from scratch. # Test it using cross-validation (train:test = 4:1) on IRIS dataset # which you can obtain it from scikit-learn load iris function with K = 1, 2, 3. # Your program will be runned by ”python p5.py” from IPython import embed import numpy as np from sklearn import datasets from knn import Knn iris = datasets.load_iris() X = iris.data y = iris.target knn = Knn(X, y, ks=[1, 2, 3]) knn.classify()
knn = Knn(k, train_set, train_key2ind) print 'Reading test dataset..' test_set, test_key2ind, test_ind2key = dataset_reader.read_dataset( test_filename, test_filename + '.key', False, isolate_target_sentence) print 'Starting to classify test set:' with open(result_filename, 'w') as o: for ind, key_set in enumerate(test_set): key = test_ind2key[ind] if debug: print 'KEY:', key print for instance_id, vec, text in izip(key_set.instance_ids, key_set.context_m, key_set.contexts_str): if debug: print 'QUERY:', text.strip() result = knn.classify(key, vec, ignore_closest, debug) if debug: print #brother.n 00006 501566/0.5 501573/0.4 503751/0.1 result_line = key + ' ' + instance_id for sid, weight in result.iteritems(): result_line += ' {}/{:.4f}'.format(sid, weight) o.write(result_line + '\n') if debug: print 'LABELS FOUND: ', result_line print
knn = Knn(k, train_set, train_key2ind) print('Reading test dataset..') test_set, test_key2ind, test_ind2key = dataset_reader.read_dataset(test_filename, test_filename+'.key', False, isolate_target_sentence) print('Starting to classify test set:') with open(result_filename, 'w') as o: for ind, key_set in enumerate(test_set): key = test_ind2key[ind] if debug: print('KEY:', key) print() for instance_id, vec, text in zip(key_set.instance_ids, key_set.context_m, key_set.contexts_str): if debug: print('QUERY:', text.strip()) result = knn.classify(key, vec, ignore_closest, debug) if debug: print() #brother.n 00006 501566/0.5 501573/0.4 503751/0.1 result_line = key + ' ' + instance_id for sid, weight in result.items(): result_line += ' {}/{:.4f}'.format(sid, weight) o.write(result_line+'\n') if debug: print('LABELS FOUND: ', result_line) print()