예제 #1
0
 def startFullDownload(self, location):
     status, downloadType, packageSize, readBytes = utils.readMarkerFile()
     if status == constants.READY:
         utils.updateMarkerFile(constants.DOWNLOAD)
     utils.fetchData(location, cb=self.setMarkerToReady)
     # This function is async in nature
     utils.updateMarkerFile(constants.DOWNLOAD_COMPLETE, None, None,
                            constants.FULL)
예제 #2
0
def main():
    data = [(x, y) for x, y in utils.fetchData() if y in [0, 2]]
    binaryData = utils.transformToBinaryClasses(data, positiveClass=[0])

    divider = int(round(.85 * len(binaryData)))
    validation = binaryData[divider:]
    trainset = binaryData[:divider]
    inputDim = 784

    model = utils.SVM(inputDim, utils.eta, 1, 10)
    model.train(trainset, printLoss=True)

    # validate
    correct = 0
    incorrect = 0
    for x, y in validation:
        y_tag = model.inference(x)
        if (y == y_tag):
            correct += 1
        else:
            incorrect += 1

    acc = 1. * correct / len(validation)
    print("correct: {} incorrect: {} total: {} \n accuracy: {}".format(\
        correct, incorrect, len(validation), acc))
예제 #3
0
    def startIncrementalDownload(self, location):
        ''' This function is used for Downloading the file in increments

        This function can be called by the Download process or the Daemon,
        to ensure that partial download can work in case Pi reboots.
        In case the network breaks in between, the network will be tested
        after every 90 sec.
        The Marker file would be updated after every increment, so that if
        the Pi reboots, it can start from the last known byte.
        One situation can occur where in the Pi fails just after download
        and before updating the file, then the last byte would be downloaded
        again.
        Input: Location of the package
        Output: None
        '''
        status, downloadType, packageSize, readBytes = utils.readMarkerFile()
        if status == constants.READY:
            utils.updateMarkerFile(constants.DOWNLOAD, constants.PARTIAL,
                                   self.packageSize, "0")
        try:
            readBytes = int(readBytes)
            packageSize = int(packageSize)
        except ValueError:
            print("Marker File is not consistent")
        while status == constants.DOWNLOAD:
            try:
                for pkt in range(readBytes / byteSize,
                                 self.packageSize / self.byteSize):
                    end_bytes = readBytes + (pkt * byteSize)
                    if end_bytes > self.packageSize:
                        # Possible when packageSize is not a multe of byteSize
                        end_bytes = self.packageSize - (readBytes + byteSize *
                                                        (pkt - 1))
                    utils.fetchData(location,
                                    start_range=readBytes,
                                    end_range=end_bytes)
                    utils.updateMarkerFile(constants.DOWNLOAD,
                                           self.packageSize, end_bytes,
                                           constants.PARTIAL)
                status = CONSTANTS.READY
                self.setMarkerToDownloadCompleted()
            except exceptions.TimeOutException:
                while not utils.isServerAccessible():
                    time.sleep(90)
예제 #4
0
def main():
    k = utils.numOfClasses
    columns = int(((k * k) * (k - 1)) / 4)
    ecoc_matrix = np.zeros((k, columns), dtype=float)
    classifiers = []
    trainset = utils.fetchData()
    print("total train set data len: {}".format(str(len(trainset))))
    testset = utils.loadTestData()

    lambda_p = 1
    epoch_number = 15
    pair_index = 0

    # Train All-Pair Classifiers
    for i in range(utils.numOfClasses):
        # add all other classes that are not the positive class
        oppsiteClasses = [c for c in range(utils.numOfClasses) if c != i]
        for y0, y1 in get_all_pairs(oppsiteClasses):
            update_ecoc_matrix(ecoc_matrix, pair_index, i, (y0, y1))
            print("working on {} vs {},{}".format(i, y0, y1))
            pair_index = pair_index + 1
            filtered_data = filter_data(trainset, (i, y0, y1))
            print("relevant data: {}".format(str(len(filtered_data))))
            binary_data = utils.transformToBinaryClasses(filtered_data,
                                                         positiveClass=i)
            model = utils.SVM(utils.inputDim, utils.eta, lambda_p,
                              epoch_number)
            model.train(binary_data)
            classifiers.append(model)
            print("finished with #{} model".format(pair_index))

    # Evaluate Test Data by Hamming Distance
    utils.evaluation(testset,
                     utils.HammingDistance,
                     ecoc_matrix,
                     'test.random2.ham.pred',
                     classifiers,
                     distanceMetric="Hamming")

    # Evaluate Test Data by Loss Base Decoding
    utils.evaluation(testset,
                     utils.lossBaseDecoding,
                     ecoc_matrix,
                     'test.random2.loss.pred',
                     classifiers,
                     distanceMetric="LBD")
예제 #5
0
def main():
    classifiers = []
    trainset = utils.fetchData()
    devset = utils.loadDevData()
    testset = utils.loadTestData()

    # train OvA classifiers
    for i in range(utils.numOfClasses):
        binData = utils.transformToBinaryClasses(trainset, positiveClass=[i])
        model = utils.SVM(utils.inputDim, utils.eta, 1, 50)
        model.train(binData)
        classifiers.append(model)
        print("finished with #{} model".format(i))

    # Validation - Evaluate Test Data by Hamming Distance
    utils.validate(devset,
                   utils.HammingDistance,
                   ecocMat,
                   'test.onevall.ham.pred',
                   classifiers,
                   distanceMetric="Hamming")

    # Validation - Evaluate Test Data by Loss Base Decoding
    utils.validate(devset,
                   utils.lossBaseDecoding,
                   ecocMat,
                   'test.onevall.ham.pred',
                   classifiers,
                   distanceMetric="LBD")

    # Test - Evaluate test data by Hamming Distance
    utils.evaluate(testset,
                   utils.HammingDistance,
                   ecocMat,
                   'test.onevall.ham.pred',
                   classifiers,
                   distanceMetric="Hamming")

    # Test - Evaluate test data by Loss Base Decoding
    utils.evaluate(testset,
                   utils.lossBaseDecoding,
                   ecocMat,
                   'test.onevall.loss.pred',
                   classifiers,
                   distanceMetric="LBD")
예제 #6
0
def main():
    k = utils.numOfClasses
    columns = int(k * (k - 1) / 2)
    ecoc_matrix = np.zeros((k, columns), dtype=int)
    classifiers = []
    trainset = utils.fetchData()
    devset = utils.loadDevData()
    print("total train set data len: {}".format(str(len(trainset))))
    testset = utils.loadTestData()

    lambda_p = 1
    epoch_number = 20
    pair_index = 0

    # Train All-Pair Classifiers
    for y0, y1 in get_all_pairs(utils.numOfClasses):
        update_ecoc_matrix(ecoc_matrix, pair_index, y0, y1)
        print("working on pair {},{}".format(y0, y1))
        pair_index = pair_index + 1
        filtered_data = filter_data(trainset, (y0, y1))
        print("pair relevant data: {}".format(str(len(filtered_data))))
        binary_data = utils.transformToBinaryClasses(filtered_data,
                                                     positiveClass=[y0])
        model = utils.SVM(utils.inputDim, utils.eta, lambda_p, epoch_number)
        model.train(binary_data)
        classifiers.append(model)
        print("finished with #{} model".format(pair_index))

    print(ecoc_matrix)

    # Evaluate Test Data by Hamming Distance
    utils.validate(devset,
                   utils.HammingDistance,
                   ecoc_matrix,
                   'test.allpairs.ham.pred',
                   classifiers,
                   distanceMetric="Hamming")

    # Evaluate Test Data by Loss Base Decoding
    utils.validate(devset,
                   utils.lossBaseDecoding,
                   ecoc_matrix,
                   'test.allpairs.loss.pred',
                   classifiers,
                   distanceMetric="LBD")

    # Evaluate Test Data by Hamming Distance
    utils.evaluate(testset,
                   utils.HammingDistance,
                   ecoc_matrix,
                   'test.allpairs.ham.pred',
                   classifiers,
                   distanceMetric="Hamming")

    # Evaluate Test Data by Loss Base Decoding
    utils.evaluate(testset,
                   utils.lossBaseDecoding,
                   ecoc_matrix,
                   'test.allpairs.loss.pred',
                   classifiers,
                   distanceMetric="LBD")
def main():
    requiredStudentInfo = [
        "id_student", "highest_education", "studied_credits",
        "num_of_prev_attempts", "final_result", "disability"
    ]
    requiredStudentAssessment = ["id_student", "date_submitted", "score"]
    requiredStudentVLE = ["id_student", "sum_of_sum_click"]
    # reading csv Files
    studentInfo = utils.fetchData("studentInfo.csv")
    studentAssessment = utils.fetchData("studentAssessment.csv")
    studentVLE = utils.fetchData("studentVle.csv")

    # creating a new column sum_of_sum_click
    studentVLE["sum_of_sum_click"] = studentVLE.groupby(
        ["id_student"])["sum_click"].transform(sum)

    studentInfo.set_index('id_student')
    studentAssessment.set_index('id_student')
    studentVLE.set_index('id_student')

    studentInfo = studentInfo[requiredStudentInfo]
    studentAssessment = studentAssessment[requiredStudentAssessment]
    studentVLE = studentVLE[requiredStudentVLE]
    studentVLE.drop_duplicates("id_student", inplace=True)

    # Theres are some "?" in studentAssessment csv
    # replacing them with 0 and converting to integer
    print("Cleaning \"Score\" Column in studentAssessment.csv")
    studentAssessment = utils.removeUnwantedData(studentAssessment, "score",
                                                 "?", "0")
    studentAssessment["score"] = pd.to_numeric(studentAssessment["score"])

    # combining the three dataFrames
    print("Combining dataFrames...")
    combinedDF = studentInfo.combine_first(studentAssessment)
    combinedDF = combinedDF.combine_first(studentVLE)

    combinedDF.set_index('id_student')
    combinedDFcopy = combinedDF.copy()

    # converting string based data to dummy columns
    print("Encoding string columns...")
    combinedDF = utils.encodingColumns(combinedDF)

    combinedDF["disability"] = pd.to_numeric(combinedDF["disability"])
    combinedDF["final_result"] = pd.to_numeric(combinedDF["final_result"])
    combinedDF["highest_education"] = pd.to_numeric(
        combinedDF["highest_education"])

    # resolving NAN which are created when we combined the dataFrames
    print("Resolving NANs...")
    combinedDF = utils.resolveNANs(combinedDF)

    # Applying KMeans Clustering to create a new column in the dataFrame "procastinate"
    print("Applying KMeans...")
    kmeans = KMeans(init='random', n_clusters=2, tol=1e-04,
                    random_state=0).fit(combinedDF[[
                        "highest_education", "studied_credits",
                        "num_of_prev_attempts", "final_result", "disability",
                        "date_submitted", "score", "sum_of_sum_click"
                    ]])
    # labels = kmeans.fit_predict(combinedDF)
    labels = kmeans.labels_

    # changing 1's and 0's to True and False
    combinedDFcopy["procastinate"] = labels == 1

    # Adding a new column "procastinate"
    combinedDF["procastinate"] = labels

    # Randomizing
    combinedDF.sample(frac=1)

    # Creating New DataFrames inTime and procastinate (for Visualizatino)
    inTime, procastinate = [
        x for _, x in combinedDF.groupby(combinedDF['procastinate'] == 0)
    ]
    inTime = inTime.head(100)
    procastinate = procastinate.head(100)
    # print(procastinate)

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    ax.scatter(procastinate["date_submitted"],
               procastinate["score"],
               procastinate["final_result"],
               c="r",
               marker="o")
    ax.scatter(inTime["date_submitted"],
               inTime["score"],
               inTime["final_result"],
               c="g",
               marker="o")

    ax.set_xlabel("date_submitted")
    ax.set_ylabel("score")
    ax.set_zlabel("final_result")
    plt.title("Scatter Plot")
    plt.show()

    # Exporting the dataFrame to csv
    export_csv = combinedDF.to_csv('../Dataset/studentFinal.csv',
                                   index=False,
                                   header=True)

    # Setting X and y
    y = combinedDF["procastinate"]
    X = combinedDF.drop("procastinate", axis=1)
    X.set_index('id_student', inplace=True)

    # Splitting Data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.50)

    # ANN
    print("Running ANN...")
    ann = MLPClassifier(hidden_layer_sizes=(10, 10, 10), max_iter=1000)
    ann.fit(X_train, y_train.values.ravel())
    predictions = ann.predict(X_test)
    score = ann.score(X_test, y_test)
    loss_values = ann.loss_curve_
    print("Accuracy: ", score * 100)
    plt.title("ANN Loss")
    plt.ylabel("Loss Value")
    plt.plot(loss_values)
    plt.show()

    utils.plot_confusion_matrix(confusion_matrix(y_test, predictions),
                                ["procastinate", "in Time"])

    print("Classification Report\n",
          classification_report(y_test, predictions))

    # Logistic Regression
    print("Running Logistic Regression...")
    logisticRegr = LogisticRegression()
    logisticRegr.fit(X_train, y_train)
    predictions = logisticRegr.predict(X_test)
    score = logisticRegr.score(X_test, y_test)
    print("Accuracy: ", score * 100)

    utils.plot_confusion_matrix(confusion_matrix(y_test, predictions),
                                ["procastinate", "in Time"])
    print("Classification Report\n",
          classification_report(y_test, predictions))

    # SVM
    print("Running SVM...")
    svmClassifier = SVC(kernel='linear')
    svmClassifier.fit(X_train.head(1000), y_train[:1000])
    predictions = svmClassifier.predict(X_test)
    score = svmClassifier.score(X_test, y_test)
    print("Accuracy: ", score * 100)

    utils.plot_confusion_matrix(confusion_matrix(y_test, predictions),
                                ["procastinate", "in Time"])
    print("Classification Report\n",
          classification_report(y_test, predictions))
예제 #8
0
def main():
    rows = utils.numOfClasses
    columns = random.randint(4, 8)
    ecoc_matrix = create_ecoc_matrix(rows, columns)
    classifiers = []
    trainset = utils.fetchData()
    print("total train set data len: {}".format(str(len(trainset))))
    devset = utils.loadDevData()
    testset = utils.loadTestData()

    lambda_p = 1
    epoch_number = 20
    print(ecoc_matrix)

    print(len(devset), len(testset))

    for j in range(columns):
        positive = []
        negetive = []
        for i in range(rows):
            if ecoc_matrix[i][j] == 1:
                positive.append(i)
            elif ecoc_matrix[i][j] == -1:
                negetive.append(i)

        print(j, " positive: ", positive, "negetive:", negetive)
        filtered_data = filter_data(trainset, negetive + positive)
        print("filtered data", len(filtered_data))
        # need to change this function to support list
        binary_data = utils.transformToBinaryClasses(filtered_data,
                                                     positiveClass=positive)
        model = utils.SVM(utils.inputDim, utils.eta, lambda_p, epoch_number)
        model.train(binary_data)
        classifiers.append(model)

    # Validation - Evaluate Test Data by Hamming Distance
    utils.validate(devset,
                   utils.HammingDistance,
                   ecoc_matrix,
                   'test.random.ham.pred',
                   classifiers,
                   distanceMetric="Hamming")

    # Validation - Evaluate Test Data by Loss Base Decoding
    utils.validate(devset,
                   utils.lossBaseDecoding,
                   ecoc_matrix,
                   'test.random.loss.pred',
                   classifiers,
                   distanceMetric="LBD")

    # Test - Evaluate Test Data by Hamming Distance
    utils.evaluate(testset,
                   utils.HammingDistance,
                   ecoc_matrix,
                   'test.random.ham.pred',
                   classifiers,
                   distanceMetric="Hamming")

    # Test - Evaluate Test Data by Loss Base Decoding
    utils.evaluate(testset,
                   utils.lossBaseDecoding,
                   ecoc_matrix,
                   'test.random.loss.pred',
                   classifiers,
                   distanceMetric="LBD")