Пример #1
0
def init_classifier_models(classifier_data_url, test_data_url, fileName):
    with open(classifier_data_url, 'rb') as file:
        classifier = pickle.load(file)
    _service = service()
    _test_data = _service.read_csv_data_without_labels(test_data_url)
    _val_pred = classifier.predict(_test_data)
    _service.save_csv_result(_val_pred, fileName, 'output_csvs')
def parameter_iteration_tunning():
        _service = service()
        train_features, train_labels = _service.read_csv_data('dataset2/ds2/ds2Train.csv')
        validation_features, validation_labels = _service.read_csv_data('dataset2/ds2/ds2Val.csv')
        _range = [0.01, 0.001, 0.1, 1, 10]
        for index in _range:
                _clf = BernoulliNB(alpha=index)
                _clf.fit(train_features, train_labels )
                pred = _clf.predict(validation_features)
                print('BernoulliNB accuracy ' + str(index) + ' is', accuracy_score(validation_labels, pred))
def parameter_iteration_tunning():
    _service = service()
    train_features, train_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Train.csv')
    validation_features, validation_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Val.csv')
    _range = [10, 100, 200, 300, 400, 500, 1000]
    for index in _range:
        _clf = RandomForestClassifier(n_estimators=index)
        _clf.fit(train_features, train_labels)
        pred = _clf.predict(validation_features)
        print('RandomForest accuracy ' + str(index) + ' is',
              accuracy_score(validation_labels, pred))
def load_naive_bayes_bernoulli():
        _service = service()
        train_features, train_labels = _service.read_csv_data('dataset2/ds2/ds2Train.csv')
        validation_features, validation_labels = _service.read_csv_data('dataset2/ds2/ds2Val.csv')
        # classifier = BernoulliNB()
        classifier = BernoulliNB(alpha=0.001)
        classifier = classifier.fit(train_features, train_labels)
        validation_predicted = classifier.predict(validation_features)
        print(classification_report(validation_labels, validation_predicted))
        print(confusion_matrix(validation_labels, validation_predicted))
        print("Accuracy Score:", accuracy_score(validation_labels, validation_predicted))
        _service.save_csv_result(validation_predicted, 'ds2Val-nb', 'output_csvs')
        _service.save_model_to_pkl(classifier, 'ds2Classifier-nb', 'dataset2/models/')
def parameter_iteration_tunning():
    _service = service()
    train_features, train_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Train.csv')
    validation_features, validation_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Val.csv')
    _range = range(1, 30)
    for index in _range:
        _clf = DecisionTreeClassifier(max_depth=index)
        _clf.fit(train_features, train_labels)
        pred = _clf.predict(validation_features)
        print('Decision Tree accuracy ' + str(index) + ' is',
              accuracy_score(validation_labels, pred))
Пример #6
0
def load_naive_bayes_multinomial():
        _service = service()
        train_features, train_labels = _service.read_csv_data('dataset1/ds1/ds1Train.csv')
        validation_features, validation_labels = _service.read_csv_data('dataset1/ds1/ds1Val.csv')
        # classifier = MultinomialNB()
        classifier = MultinomialNB(alpha=0.01)
        classifier = classifier.fit(train_features, train_labels)
        validation_predicted = classifier.predict(validation_features)
        print(classification_report(validation_labels, validation_predicted))
        print(confusion_matrix(validation_labels, validation_predicted))
        print("Accuracy Score:", accuracy_score(validation_labels, validation_predicted))
        _service.save_csv_result(validation_predicted, 'nb_multinomial_results', 'dataset1/results/')
        _service.save_model_to_pkl(classifier, 'nb_multinomial_results', 'dataset1/models/')
def load_random_forest():
    _service = service()
    train_features, train_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Train.csv')
    validation_features, validation_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Val.csv')
    # classifier = RandomForestClassifier()
    classifier = RandomForestClassifier(n_estimators=10000)
    classifier = classifier.fit(train_features, train_labels)
    validation_predicted = classifier.predict(validation_features)
    print(classification_report(validation_labels, validation_predicted))
    print(confusion_matrix(validation_labels, validation_predicted))
    print("Accuracy Score:",
          accuracy_score(validation_labels, validation_predicted))
    _service.save_csv_result(validation_predicted, 'ds1Val-3', 'output_csvs')
    _service.save_model_to_pkl(classifier, 'ds1Classifier-3',
                               'dataset1/models/')
def load_decision_tree():
    _service = service()
    train_features, train_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Train.csv')
    validation_features, validation_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Val.csv')
    # classifier = DecisionTreeClassifier()
    classifier = DecisionTreeClassifier(criterion='entropy', max_depth=36)
    classifier = classifier.fit(train_features, train_labels)
    validation_predicted = classifier.predict(validation_features)
    print(classification_report(validation_labels, validation_predicted))
    print(confusion_matrix(validation_labels, validation_predicted))
    print("Accuracy Score:",
          accuracy_score(validation_labels, validation_predicted))
    _service.save_csv_result(validation_predicted, 'ds1Val-dt', 'output_csvs')
    _service.save_model_to_pkl(classifier, 'ds1Classifier-dt',
                               'dataset1/models/')
def load_kneighbors():
    _service = service()
    train_features, train_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Train.csv')
    validation_features, validation_labels = _service.read_csv_data(
        'dataset1/ds1/ds1Val.csv')
    # classifier = KNeighborsClassifier(n_neighbors=1)
    classifier = KNeighborsClassifier(n_neighbors=1)
    classifier = classifier.fit(train_features, train_labels)
    validation_predicted = classifier.predict(validation_features)
    print(classification_report(validation_labels, validation_predicted))
    print(confusion_matrix(validation_labels, validation_predicted))
    print("Accuracy Score:",
          accuracy_score(validation_labels, validation_predicted))
    _service.save_csv_result(validation_predicted, 'k_neighbors_results',
                             'dataset1/results/')
    _service.save_model_to_pkl(classifier, 'k_neighbors_results',
                               'dataset1/models/')
def load_naive_bayes_complement():
    _service = service()
    train_features, train_labels = _service.read_csv_data(
        'dataset2/ds2/ds2Train.csv')
    validation_features, validation_labels = _service.read_csv_data(
        'dataset2/ds2/ds2Val.csv')
    # classifier = ComplementNB()
    classifier = ComplementNB(alpha=1000)
    classifier = classifier.fit(train_features, train_labels)
    validation_predicted = classifier.predict(validation_features)
    print(classification_report(validation_labels, validation_predicted))
    print(confusion_matrix(validation_labels, validation_predicted))
    print("Accuracy Score:",
          accuracy_score(validation_labels, validation_predicted))
    _service.save_csv_result(validation_predicted, 'nb_complement_results',
                             'dataset2/results/')
    _service.save_model_to_pkl(classifier, 'nb_complement_results',
                               'dataset2/models/')