def __init__(self): self.addTrainingSamples() #read sample data from DB samples=cloudserver.db.getAllLocationSamples() pairs=[(s["sample"],s["label"]) for s in samples] #prepare KNN self.KNN=KNearestNeighbors(pairs) #list_of_rooms={} list_of_rooms_each_lab={} for room in cloudserver.db.ROOM_DEFINITION: id=room["id"] lab=room["lab"] #if id not in list_of_rooms: # list_of_rooms[id]=id if lab not in list_of_rooms_each_lab: list_of_rooms_each_lab[lab]=[] list_of_rooms_each_lab[lab]+=[id] #prepare lab prior, as a list of votes (roomID, #vote) prior_vote_const=2 self.prior_votes={} self.prior_votes[0]=[] for lab in list_of_rooms_each_lab: if lab>0: prior=[] for id in list_of_rooms_each_lab[lab]: prior.append((id,prior_vote_const)) self.prior_votes[lab]=prior print("prior votes:") print(self.prior_votes)
import numpy as np import matplotlib.pyplot as plt from sample import sample, getMatrix, confusion_matrix, plot_confusion_matrix from KNN import zscale, KNearestNeighbors, trainAndGetBestK ############ 3 PERCENT SEPERABLE ############## data = getMatrix('data/Seperable.csv') x_train, x_test, y_train, y_test = sample(data) zscale(x_train) zscale(x_test) # UNCOMMENT TO FIND BEST K, WHICH IS K = 1, SCORE = 62.6% # k,score = trainAndGetBestK(x_train, x_test, y_train, y_test) # print k, score knn = KNearestNeighbors(1) knn.fit(x_train, y_train) preds = knn.predict(x_test) cm = confusion_matrix(preds, y_test) cats = np.unique(y_train) plot_confusion_matrix(cm, cats) plt.title('seperable, k = 1, score = 62.6%') plt.show()
class LocationPredictor: trainingData = [] trainingLabels = [] def __init__(self): self.addTrainingSamples() #read sample data from DB samples=cloudserver.db.getAllLocationSamples() pairs=[(s["sample"],s["label"]) for s in samples] #prepare KNN self.KNN=KNearestNeighbors(pairs) #list_of_rooms={} list_of_rooms_each_lab={} for room in cloudserver.db.ROOM_DEFINITION: id=room["id"] lab=room["lab"] #if id not in list_of_rooms: # list_of_rooms[id]=id if lab not in list_of_rooms_each_lab: list_of_rooms_each_lab[lab]=[] list_of_rooms_each_lab[lab]+=[id] #prepare lab prior, as a list of votes (roomID, #vote) prior_vote_const=2 self.prior_votes={} self.prior_votes[0]=[] for lab in list_of_rooms_each_lab: if lab>0: prior=[] for id in list_of_rooms_each_lab[lab]: prior.append((id,prior_vote_const)) self.prior_votes[lab]=prior print("prior votes:") print(self.prior_votes) def addSamples(self, sampleFile): infile = sampleFile + ".txt" print("Loading from " + infile + "...") f = open(infile, 'r') x = f.readlines() self.trainingData = [] for i in range(len(x)): y = x[i].split('\t') last = y[-1].split('\n') y[-1] = last[0] y = map(int, y) self.trainingData.append(y) infile = sampleFile + "Labels.txt" print("Loading from " + infile + "...") f = open(infile, 'r') x = f.readlines() self.trainingLabels = [] for j in range(len(x)): y = x[j] last = y.split('\n') y = last[0] self.trainingLabels.append(y) assert(len(self.trainingData) > 0) assert(len(self.trainingData) == len(self.trainingLabels)) for k in range(len(self.trainingData)): cloudserver.db.addLocationSample(self.trainingLabels[k], self.trainingData[k]) def addTrainingSamples(self): cloudserver.db.DestroyLocationSamples() samples=cloudserver.db.getAllLocationSamples() if (len(samples) > 0): print(str(len(samples)) + " samples found") return print("no samples found") infile = "trainingFiles/trainingFilesList.txt" f = open(infile, 'r') x = f.readlines() for i in range(len(x)): filename = x[i] last = filename.split('\n') self.addSamples(last[0]) print("added samples from " + last[0]) print "successful reupload" def personal_classifier(self, ID, sample): prior=[] screenName=cloudserver.db.userIDLookup(ID) if screenName!=None: usernameAttributes = cloudserver.db.getAttributes(screenName, False) labInt = usernameAttributes["lab"] prior=self.prior_votes[labInt] nearest_votes=self.KNN.get_nearest_pairs(sample) result_pair=self.KNN.majority_vote(prior+nearest_votes) return result_pair[0]
def POST(self): raw_data = web.data() locs = raw_data.split(',') if (locs[0] == "REUP"): infile = "backup.txt" f = open(infile, 'r') x = f.readlines() self.trainingData = [] for i in range(len(x)): y = x[i].split('\t') last = y[-1].split('\n') y[-1] = last[0] y = map(int, y) self.trainingData.append(y) infile = "backuplabels.txt" f = open(infile, 'r') x = f.readlines() self.trainingLabels = [] for j in range(len(x)): y = x[j] last = y.split('\n') y = last[0] self.trainingLabels.append(y) infile = "backup2.txt" f = open(infile, 'r') x = f.readlines() for i in range(len(x)): y = x[i].split('\t') last = y[-1].split('\n') y[-1] = last[0] y = map(int, y) self.trainingData.append(y) infile = "backuplabels2.txt" f = open(infile, 'r') x = f.readlines() for j in range(len(x)): y = x[j] last = y.split('\n') y = last[0] self.trainingLabels.append(y) print "successful reupload" return if (locs[0] == "DES"): self.trainingData = [] self.trainingLabels = [] return "successful destroy" l = locs[1:] if (locs[0] == "GET"): #outfile = "backup2.txt" #with open(outfile, 'w') as file: # file.writelines('\t'.join(str(j) for j in i) + '\n' for i in self.trainingData) #outfile2 = "backuplabels2.txt" #with open(outfile2, 'w') as file: # file.writelines(str(self.rooms[i]) + '\n' for i in self.trainingLabels) #locs = map(int, l) #if (len(self.trainingLabels) < self.K): # ret = "not enough data," # ret += str(len(self.trainingLabels)) # return ret locs = map(int, l) print('Location Predict request:', locs) KNN = KNearestNeighbors( list(zip(self.trainingData, self.trainingLabels))) pairs = KNN.get_nearest_pairs(locs) print('Predicted pairs:', pairs) location = KNN.majority_vote(pairs) print(location) return str(location[0]) + ':' + str(location[1]) + ",LOL" ID = locs[0] intID = ID locs = map(int, l) self.trainingData.append(locs) self.trainingLabels.append(intID) print('Submitted ID=', ID) print('Training sample=', locs) cloudserver.db.addLocationSample(ID, locs) return str(cloudserver.db.countLocationSamples()) + " LOL"