Пример #1
0
def main():
    # Load configuration from file
    config = Configuration(config_file='/home/huma/Downloads/irlib-0.1.1/irlib/classify.conf')
    try:
        config.load_configuration()
        config_data = config.get_configuration()
    except:
        print("Error loading configuration file.")
        print("Classifier aborting.")
        raise

    # config.display_configuration()
    print(config)

    # sys.exit()

    myfolds = config.get_folds()
    correctness = 0

    # Preporcessor: tokenizer, stemmer, etc.
    prep_lower = config_data['lower']
    prep_stem = config_data['stem']
    prep_pos = config_data['pos']
    prep_ngram = config_data['ngram']
    prep = Preprocessor(pattern='\W+', lower=prep_lower, stem=prep_stem, pos=prep_pos, ngram=prep_ngram)

    for myfold in myfolds:
        ev = Evaluation(config=config, fold=myfold)
        if config_data['classifier'] == 'rocchio':
            ml = Rocchio(verbose=VERBOSE, fold=myfold, config=config, ev=ev)
        elif config_data['classifier'] == 'knn':
            ml = KNN(verbose=VERBOSE, fold=myfold, config=config, ev=ev)
        else:
            ml = NaiveBayes(verbose=VERBOSE, fold=myfold, config=config, ev=ev)
        training(config, myfold, ml, prep)
        ml.do_padding()
        ml.calculate_training_data()
        # r.display_idx()
        ml.diagnose()
        testing(config, myfold, ml, ev, prep)

        k = config_data['k']
        results = ev.calculate(review_spam=True, k=k)
        print('Accuracy for fold %d: %s' % (myfold, results))

        correctness += results

    print("Average accuracy for all folds:", correctness / len(myfolds))