Exemplo n.º 1
0
def main(path, predict_column):
    df = pd.read_csv(path)
    columns = df.columns.values
    X = [
        df[column].values for column in columns
        if column != predict_column and column != 'Name'
    ]
    y = df[predict_column].values

    swtch = Switcher()

    sk_X = []
    for column in X:
        converted = np.array(swtch.convert_to(column))
        sk_X.append(converted)
    sk_X = np.array(sk_X).T

    sk_y = np.array(swtch.convert_to(y)).T

    decisions = ID3()
    entropy = decisions.fit(X, y)
    print(f'Entropies:\t\t\t{entropy}')

    sk_decisions = DecisionTreeClassifier(random_state=1370)
    sk_decisions.fit(sk_X, sk_y)
    print(f'SkLearn Tree Decision accuracy:\t{sk_decisions.score(sk_X, sk_y)}')
    #print(sk_decisions.predict(['pycharm', 'Java', 'tea']))
    return 0
Exemplo n.º 2
0
def main(path, predict_column):
    df = pd.read_csv(path)
    columns = df.columns.values
    X = [
        df[column].values for column in columns
        if column != predict_column and column != 'Name'
    ]
    y = df[predict_column].values

    decisions = ID3()
    fitted = decisions.fit(X, y)
    print(f'Entropy of {predict_column}:\t\t\t{fitted[1]}\n')
    print(f'Information Gains of Xs\n')
    headers = ' '.join(
        map(
            str,
            list(
                filter(lambda x: x != predict_column and x != "Name",
                       df.columns.values)))).replace(' ', '\t | ')
    print(f'{headers}')
    values = ' '.join(map(lambda x: "{0:.8f}".format(x),
                          fitted[0])).replace(' ', '\t | ')
    print(values)
    print('\n\n')

    swtch = Switcher()

    sk_X = []
    for column in X:
        converted = np.array(swtch.convert_to(column))
        sk_X.append(converted)
    sk_X = np.array(sk_X).T

    sk_y = np.array(swtch.convert_to(y)).T

    x_train, x_test, y_train, y_test = train_test_split(sk_X,
                                                        sk_y,
                                                        test_size=0.3)

    sk_decisions = DecisionTreeClassifier(random_state=1370)
    sk_decisions.fit(x_train, y_train)
    print(
        f'SkLearn Tree Decision accuracy:\t{sk_decisions.score(x_train, y_train)}'
    )
    print(f'SkLearn Tree Decision prediction: {sk_decisions.predict(x_test)}')
    return 0