def startFullDownload(self, location): status, downloadType, packageSize, readBytes = utils.readMarkerFile() if status == constants.READY: utils.updateMarkerFile(constants.DOWNLOAD) utils.fetchData(location, cb=self.setMarkerToReady) # This function is async in nature utils.updateMarkerFile(constants.DOWNLOAD_COMPLETE, None, None, constants.FULL)
def main(): data = [(x, y) for x, y in utils.fetchData() if y in [0, 2]] binaryData = utils.transformToBinaryClasses(data, positiveClass=[0]) divider = int(round(.85 * len(binaryData))) validation = binaryData[divider:] trainset = binaryData[:divider] inputDim = 784 model = utils.SVM(inputDim, utils.eta, 1, 10) model.train(trainset, printLoss=True) # validate correct = 0 incorrect = 0 for x, y in validation: y_tag = model.inference(x) if (y == y_tag): correct += 1 else: incorrect += 1 acc = 1. * correct / len(validation) print("correct: {} incorrect: {} total: {} \n accuracy: {}".format(\ correct, incorrect, len(validation), acc))
def startIncrementalDownload(self, location): ''' This function is used for Downloading the file in increments This function can be called by the Download process or the Daemon, to ensure that partial download can work in case Pi reboots. In case the network breaks in between, the network will be tested after every 90 sec. The Marker file would be updated after every increment, so that if the Pi reboots, it can start from the last known byte. One situation can occur where in the Pi fails just after download and before updating the file, then the last byte would be downloaded again. Input: Location of the package Output: None ''' status, downloadType, packageSize, readBytes = utils.readMarkerFile() if status == constants.READY: utils.updateMarkerFile(constants.DOWNLOAD, constants.PARTIAL, self.packageSize, "0") try: readBytes = int(readBytes) packageSize = int(packageSize) except ValueError: print("Marker File is not consistent") while status == constants.DOWNLOAD: try: for pkt in range(readBytes / byteSize, self.packageSize / self.byteSize): end_bytes = readBytes + (pkt * byteSize) if end_bytes > self.packageSize: # Possible when packageSize is not a multe of byteSize end_bytes = self.packageSize - (readBytes + byteSize * (pkt - 1)) utils.fetchData(location, start_range=readBytes, end_range=end_bytes) utils.updateMarkerFile(constants.DOWNLOAD, self.packageSize, end_bytes, constants.PARTIAL) status = CONSTANTS.READY self.setMarkerToDownloadCompleted() except exceptions.TimeOutException: while not utils.isServerAccessible(): time.sleep(90)
def main(): k = utils.numOfClasses columns = int(((k * k) * (k - 1)) / 4) ecoc_matrix = np.zeros((k, columns), dtype=float) classifiers = [] trainset = utils.fetchData() print("total train set data len: {}".format(str(len(trainset)))) testset = utils.loadTestData() lambda_p = 1 epoch_number = 15 pair_index = 0 # Train All-Pair Classifiers for i in range(utils.numOfClasses): # add all other classes that are not the positive class oppsiteClasses = [c for c in range(utils.numOfClasses) if c != i] for y0, y1 in get_all_pairs(oppsiteClasses): update_ecoc_matrix(ecoc_matrix, pair_index, i, (y0, y1)) print("working on {} vs {},{}".format(i, y0, y1)) pair_index = pair_index + 1 filtered_data = filter_data(trainset, (i, y0, y1)) print("relevant data: {}".format(str(len(filtered_data)))) binary_data = utils.transformToBinaryClasses(filtered_data, positiveClass=i) model = utils.SVM(utils.inputDim, utils.eta, lambda_p, epoch_number) model.train(binary_data) classifiers.append(model) print("finished with #{} model".format(pair_index)) # Evaluate Test Data by Hamming Distance utils.evaluation(testset, utils.HammingDistance, ecoc_matrix, 'test.random2.ham.pred', classifiers, distanceMetric="Hamming") # Evaluate Test Data by Loss Base Decoding utils.evaluation(testset, utils.lossBaseDecoding, ecoc_matrix, 'test.random2.loss.pred', classifiers, distanceMetric="LBD")
def main(): classifiers = [] trainset = utils.fetchData() devset = utils.loadDevData() testset = utils.loadTestData() # train OvA classifiers for i in range(utils.numOfClasses): binData = utils.transformToBinaryClasses(trainset, positiveClass=[i]) model = utils.SVM(utils.inputDim, utils.eta, 1, 50) model.train(binData) classifiers.append(model) print("finished with #{} model".format(i)) # Validation - Evaluate Test Data by Hamming Distance utils.validate(devset, utils.HammingDistance, ecocMat, 'test.onevall.ham.pred', classifiers, distanceMetric="Hamming") # Validation - Evaluate Test Data by Loss Base Decoding utils.validate(devset, utils.lossBaseDecoding, ecocMat, 'test.onevall.ham.pred', classifiers, distanceMetric="LBD") # Test - Evaluate test data by Hamming Distance utils.evaluate(testset, utils.HammingDistance, ecocMat, 'test.onevall.ham.pred', classifiers, distanceMetric="Hamming") # Test - Evaluate test data by Loss Base Decoding utils.evaluate(testset, utils.lossBaseDecoding, ecocMat, 'test.onevall.loss.pred', classifiers, distanceMetric="LBD")
def main(): k = utils.numOfClasses columns = int(k * (k - 1) / 2) ecoc_matrix = np.zeros((k, columns), dtype=int) classifiers = [] trainset = utils.fetchData() devset = utils.loadDevData() print("total train set data len: {}".format(str(len(trainset)))) testset = utils.loadTestData() lambda_p = 1 epoch_number = 20 pair_index = 0 # Train All-Pair Classifiers for y0, y1 in get_all_pairs(utils.numOfClasses): update_ecoc_matrix(ecoc_matrix, pair_index, y0, y1) print("working on pair {},{}".format(y0, y1)) pair_index = pair_index + 1 filtered_data = filter_data(trainset, (y0, y1)) print("pair relevant data: {}".format(str(len(filtered_data)))) binary_data = utils.transformToBinaryClasses(filtered_data, positiveClass=[y0]) model = utils.SVM(utils.inputDim, utils.eta, lambda_p, epoch_number) model.train(binary_data) classifiers.append(model) print("finished with #{} model".format(pair_index)) print(ecoc_matrix) # Evaluate Test Data by Hamming Distance utils.validate(devset, utils.HammingDistance, ecoc_matrix, 'test.allpairs.ham.pred', classifiers, distanceMetric="Hamming") # Evaluate Test Data by Loss Base Decoding utils.validate(devset, utils.lossBaseDecoding, ecoc_matrix, 'test.allpairs.loss.pred', classifiers, distanceMetric="LBD") # Evaluate Test Data by Hamming Distance utils.evaluate(testset, utils.HammingDistance, ecoc_matrix, 'test.allpairs.ham.pred', classifiers, distanceMetric="Hamming") # Evaluate Test Data by Loss Base Decoding utils.evaluate(testset, utils.lossBaseDecoding, ecoc_matrix, 'test.allpairs.loss.pred', classifiers, distanceMetric="LBD")
def main(): requiredStudentInfo = [ "id_student", "highest_education", "studied_credits", "num_of_prev_attempts", "final_result", "disability" ] requiredStudentAssessment = ["id_student", "date_submitted", "score"] requiredStudentVLE = ["id_student", "sum_of_sum_click"] # reading csv Files studentInfo = utils.fetchData("studentInfo.csv") studentAssessment = utils.fetchData("studentAssessment.csv") studentVLE = utils.fetchData("studentVle.csv") # creating a new column sum_of_sum_click studentVLE["sum_of_sum_click"] = studentVLE.groupby( ["id_student"])["sum_click"].transform(sum) studentInfo.set_index('id_student') studentAssessment.set_index('id_student') studentVLE.set_index('id_student') studentInfo = studentInfo[requiredStudentInfo] studentAssessment = studentAssessment[requiredStudentAssessment] studentVLE = studentVLE[requiredStudentVLE] studentVLE.drop_duplicates("id_student", inplace=True) # Theres are some "?" in studentAssessment csv # replacing them with 0 and converting to integer print("Cleaning \"Score\" Column in studentAssessment.csv") studentAssessment = utils.removeUnwantedData(studentAssessment, "score", "?", "0") studentAssessment["score"] = pd.to_numeric(studentAssessment["score"]) # combining the three dataFrames print("Combining dataFrames...") combinedDF = studentInfo.combine_first(studentAssessment) combinedDF = combinedDF.combine_first(studentVLE) combinedDF.set_index('id_student') combinedDFcopy = combinedDF.copy() # converting string based data to dummy columns print("Encoding string columns...") combinedDF = utils.encodingColumns(combinedDF) combinedDF["disability"] = pd.to_numeric(combinedDF["disability"]) combinedDF["final_result"] = pd.to_numeric(combinedDF["final_result"]) combinedDF["highest_education"] = pd.to_numeric( combinedDF["highest_education"]) # resolving NAN which are created when we combined the dataFrames print("Resolving NANs...") combinedDF = utils.resolveNANs(combinedDF) # Applying KMeans Clustering to create a new column in the dataFrame "procastinate" print("Applying KMeans...") kmeans = KMeans(init='random', n_clusters=2, tol=1e-04, random_state=0).fit(combinedDF[[ "highest_education", "studied_credits", "num_of_prev_attempts", "final_result", "disability", "date_submitted", "score", "sum_of_sum_click" ]]) # labels = kmeans.fit_predict(combinedDF) labels = kmeans.labels_ # changing 1's and 0's to True and False combinedDFcopy["procastinate"] = labels == 1 # Adding a new column "procastinate" combinedDF["procastinate"] = labels # Randomizing combinedDF.sample(frac=1) # Creating New DataFrames inTime and procastinate (for Visualizatino) inTime, procastinate = [ x for _, x in combinedDF.groupby(combinedDF['procastinate'] == 0) ] inTime = inTime.head(100) procastinate = procastinate.head(100) # print(procastinate) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(procastinate["date_submitted"], procastinate["score"], procastinate["final_result"], c="r", marker="o") ax.scatter(inTime["date_submitted"], inTime["score"], inTime["final_result"], c="g", marker="o") ax.set_xlabel("date_submitted") ax.set_ylabel("score") ax.set_zlabel("final_result") plt.title("Scatter Plot") plt.show() # Exporting the dataFrame to csv export_csv = combinedDF.to_csv('../Dataset/studentFinal.csv', index=False, header=True) # Setting X and y y = combinedDF["procastinate"] X = combinedDF.drop("procastinate", axis=1) X.set_index('id_student', inplace=True) # Splitting Data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50) # ANN print("Running ANN...") ann = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000) ann.fit(X_train, y_train.values.ravel()) predictions = ann.predict(X_test) score = ann.score(X_test, y_test) loss_values = ann.loss_curve_ print("Accuracy: ", score * 100) plt.title("ANN Loss") plt.ylabel("Loss Value") plt.plot(loss_values) plt.show() utils.plot_confusion_matrix(confusion_matrix(y_test, predictions), ["procastinate", "in Time"]) print("Classification Report\n", classification_report(y_test, predictions)) # Logistic Regression print("Running Logistic Regression...") logisticRegr = LogisticRegression() logisticRegr.fit(X_train, y_train) predictions = logisticRegr.predict(X_test) score = logisticRegr.score(X_test, y_test) print("Accuracy: ", score * 100) utils.plot_confusion_matrix(confusion_matrix(y_test, predictions), ["procastinate", "in Time"]) print("Classification Report\n", classification_report(y_test, predictions)) # SVM print("Running SVM...") svmClassifier = SVC(kernel='linear') svmClassifier.fit(X_train.head(1000), y_train[:1000]) predictions = svmClassifier.predict(X_test) score = svmClassifier.score(X_test, y_test) print("Accuracy: ", score * 100) utils.plot_confusion_matrix(confusion_matrix(y_test, predictions), ["procastinate", "in Time"]) print("Classification Report\n", classification_report(y_test, predictions))
def main(): rows = utils.numOfClasses columns = random.randint(4, 8) ecoc_matrix = create_ecoc_matrix(rows, columns) classifiers = [] trainset = utils.fetchData() print("total train set data len: {}".format(str(len(trainset)))) devset = utils.loadDevData() testset = utils.loadTestData() lambda_p = 1 epoch_number = 20 print(ecoc_matrix) print(len(devset), len(testset)) for j in range(columns): positive = [] negetive = [] for i in range(rows): if ecoc_matrix[i][j] == 1: positive.append(i) elif ecoc_matrix[i][j] == -1: negetive.append(i) print(j, " positive: ", positive, "negetive:", negetive) filtered_data = filter_data(trainset, negetive + positive) print("filtered data", len(filtered_data)) # need to change this function to support list binary_data = utils.transformToBinaryClasses(filtered_data, positiveClass=positive) model = utils.SVM(utils.inputDim, utils.eta, lambda_p, epoch_number) model.train(binary_data) classifiers.append(model) # Validation - Evaluate Test Data by Hamming Distance utils.validate(devset, utils.HammingDistance, ecoc_matrix, 'test.random.ham.pred', classifiers, distanceMetric="Hamming") # Validation - Evaluate Test Data by Loss Base Decoding utils.validate(devset, utils.lossBaseDecoding, ecoc_matrix, 'test.random.loss.pred', classifiers, distanceMetric="LBD") # Test - Evaluate Test Data by Hamming Distance utils.evaluate(testset, utils.HammingDistance, ecoc_matrix, 'test.random.ham.pred', classifiers, distanceMetric="Hamming") # Test - Evaluate Test Data by Loss Base Decoding utils.evaluate(testset, utils.lossBaseDecoding, ecoc_matrix, 'test.random.loss.pred', classifiers, distanceMetric="LBD")