Exemplo n.º 1
0
def numericCol(table, structureTextFile):
    structure = pd.read_csv(structureTextFile,
                            sep=" ",
                            names=['type', 'feature', 'data'])
    column = []
    headers = getColumnTitles(table)

    for i in range(structure.shape[0]):
        if 'NUMERIC' in structure.loc[i]['data']:
            column += [headers[i]]
    return column
Exemplo n.º 2
0
def allArraysOfFetures(table, classCol):
    """

    :param table:
    :param classCol:
    :return: dict, keys is tuple(feature,'yes'/'no'),values is list of probabilities of the values of the key
    """
    thisDict = {}
    for i in getColumnTitles(table):
        if i not in classCol:
            for j in valuesType(table, classCol):
                thisDict[i, j] = pArrayByFeature(table, i, j, classCol)
                # print(pArrayByFeature(train,i,j,classCol))
    return thisDict
Exemplo n.º 3
0
    def numericCol(table, structureTextFile):
        """

        :param table:
        :param structureTextFile: path
        :return: list of column which is numeric by the structure file
        """
        structure = pd.read_csv(structureTextFile, sep=" ", names=['type', 'feature', 'data'])
        column = []
        headers = getColumnTitles(table)
        for i in range(structure.shape[0]):
            if 'NUMERIC' in structure.loc[i]['data']:
                column += [headers[i]]
        return column
Exemplo n.º 4
0
def naiveBayes(test, train, structFile):
    """
    print the accuracy of the model by test file
    :param test:
    :param train:
    :param structure:
    """

    thisDict = allArraysOfFetures(train, 'class')
    rows = test.shape[0]
    match_yes = 0
    match_no = 0
    fail_no = 0
    fail_yes = 0
    # save model to file
    filename = 'naiveBayes_model.sav'
    joblib.dump(thisDict, filename)

    column = getColumnTitles(test)[:-1]  # clean 'class' column
    for _ in range(rows):

        noPar = 1
        yesPar = 1
        for col in column:
            try:
                index = valuesType(train, col).index(test.iloc[_][col])
                yesPar *= thisDict[(col, 'yes')][index]
                noPar *= thisDict[(col, 'no')][index]
            except:
                continue
        if yesPar > noPar:
            if test.iloc[_]['class'] == 'yes':
                match_yes += 1
            else:
                fail_yes += 1
        else:
            if test.iloc[_]['class'] == 'no':
                match_no += 1
            else:
                fail_no += 1
    #print('naiveBayes accuracy:', ((match_yes+match_no)) / rows), '%')
    Eval(match_yes, match_no, fail_yes, fail_no)