plt.plot(epochs, loss, 'bo', label='Training loss')
    plt.plot(epochs, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss {}'.format(
        datasets_helper.get_dataset_name()))
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(
        results_saver.get_plot_path(datasets_helper.get_dataset_name(),
                                    "loss"))

    plt.clf()
    acc = history.history['acc']
    val_acc = history.history['val_acc']
    plt.plot(epochs, acc, 'bo', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation accuracy {}'.format(
        datasets_helper.get_dataset_name()))
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.savefig(
        results_saver.get_plot_path(datasets_helper.get_dataset_name(), "acc"))
    plt.clf()

    results_saver.add_log("Finished testing dataset {}".format(
        datasets_helper.get_dataset_name()))

results_saver.write_2D_list("results", results)
results_saver.end_logging()
Пример #2
0
    doc_term_matrix = [dictionary.doc2bow(doc) for doc in texts]"""
    model = Lda(num_of_topics, num_of_important_words, passes=5, iterations=5)
    """gensim.models.LdaModel(
    doc_term_matrix,
    num_topics=num_of_topics,
    id2word=dictionary,
    passes=2,
    iterations=2)"""

    #LDA section
    test_LDA = False
    if test_LDA:
        model.train(documents)
        topic_words_lda = extract_important_words(model.get_topics(), True)
        print(topic_words_lda)
        log_writer.write_2D_list('topic_words_lda', topic_words_lda, 'w+')
        test_model(documents, labels, model, log_writer, 'standard_lda')
        #plot_clustering_chart(model,True,documents,log_writer,'lda',dataset_helper.get_dataset_name(),dataset_helper.get_num_of_topics())
        measureCoherence(topic_words_lda, log_writer, model.dictionary,
                         documents, 'lda', dataset_helper.get_dataset_name())
    else:
        model.dictionary = corpora.Dictionary(
            [text.split() for text in documents])
    neural_lda_in = NeuralTopicMatrix(weight_in, reverse_word_map,
                                      num_of_topics, tokenizer)
    neural_lda_out = NeuralTopicMatrix(weight_out, reverse_word_map,
                                       num_of_topics, tokenizer)
    #neural_lda_combined = NeuralTopicMatrix(combined_weight, reverse_word_map,num_of_topics,tokenizer)
    test_model(documents, labels, neural_lda_in, log_writer, 'neural_lda_in')
    test_model(documents, labels, neural_lda_out, log_writer, 'neural_lda_out')
    #test_model(documents, labels, neural_lda_combined, log_writer,'neural_lda_combined')
Пример #3
0
                                 datasets_helper,
                                 preprocess=preprocess,
                                 preload_dataset=True,
                                 is_predicting=False,
                                 tokenizer_mode=tokenizer_mode)
    result = model.evaluate(x=test)
    print(result)
    result.append(datasets_helper.get_dataset_name())
    #result.append(model.summary())
    results.append(result)
    results_saver.add_log("Done. Finishing this dataset.")
    gnr = TrainingTextGenerator(datasets_helper.get_test_file_path(),
                                batch_size,
                                datasets_helper.get_num_of_test_texts(),
                                num_of_words,
                                tokenizer,
                                ";",
                                datasets_helper,
                                preprocess=preprocess,
                                preload_dataset=True,
                                is_predicting=True,
                                tokenizer_mode=tokenizer_mode)

    finish_dataset(model, gnr, datasets_helper, results_saver, history)

    results_saver.add_log(
        "Finished testing dataset {}".format(
            datasets_helper.get_dataset_name()), True)

    results_saver.write_2D_list("results", results, 'a+')
results_saver.end_logging()
Пример #4
0
                datasets_helper.get_dataset_name()))
        texts_for_train = datasets_helper.get_dataset(DatasetType.TRAIN)
        log_writer.add_log("Preprocessing finished")

        log_writer.add_log(
            "Starting preprocessing texts of {} for testing".format(
                datasets_helper.get_dataset_name()))
        texts_for_testing = datasets_helper.get_dataset(DatasetType.TEST)
        log_writer.add_log("Preprocessing finished")

        statistics = []
        tester.set_new_preprocess_docs(texts_for_train, texts_for_testing)
        test_params = {
            "dataset_name": datasets_helper.get_dataset_name(),
            'dataset_helper': datasets_helper
        }
        tester.do_test(model, num_of_tests, statistics, models_params[model],
                       test_params, is_stable[model])
        statistics.append([datasets_helper.get_dataset_name()])
        statistics.append([])
        output_csv.extend(statistics)

        log_writer.write_2D_list(
            "stats".format(datasets_helper.get_dataset_name(), start_time),
            output_csv, 'a+')
        log_writer.add_log(
            'Done testing {} dataset.'.format(
                datasets_helper.get_dataset_name()), True)

    log_writer.end_logging()
Пример #5
0
log_writer = LogWriter(log_file_desc='{}{}'.format(test_name, regularization))
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'g', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss {}'.format(
    dataset_helper.get_dataset_name()))
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.savefig(log_writer.get_plot_path(dataset_helper.get_dataset_name(),
                                     "loss"))
plt.clf()
"""topic_words_in = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_in]
topic_words_out = [sorted(topic_words,key=lambda x: x[1],reverse=True) for topic_words in topic_words_out]
log_writer = LogWriter(log_file_desc='LDATestsRegularize{}'.format(regularization))
log_writer.write_2D_list('topic_words_in', topic_words_in)
log_writer.write_2D_list('topic_words_out', topic_words_out)"""

topic_words_in_max = get_extremes(weight_in, num_of_topics,
                                  num_of_important_words, reverse_word_map,
                                  True, 'topic_words_in_max', log_writer,
                                  dataset_helper.get_dataset_name())
topic_words_in_min = get_extremes(weight_in, num_of_topics,
                                  num_of_important_words, reverse_word_map,
                                  False, 'topic_words_in_min', log_writer,
                                  dataset_helper.get_dataset_name())
topic_words_out_max = get_extremes(weight_out, num_of_topics,
                                   num_of_important_words, reverse_word_map,
Пример #6
0
                            data_sets[i][3], start_time, i,
                            model_settings_index, index, j))
                    statistics[len(statistics) - 1].append(accuracy)
                    log_writer.add_log(
                        "Testing LSA model done with {}% accuracy".format(
                            accuracy * 100))
                    log_writer.add_log("\n\n\n")

            statistics.append([])
        statistics_to_merge.append(statistics)
        """for model_settings_index, model_settings in enumerate(hdp_variations):
            for j in range(num_of_test):
                test_checker_hdp = TestChecker(texts_for_testing, data_sets[i][2], log_writer)
                hdp = Hdp(4, 15)
                hdp.train(texts_for_train)
                log_writer.add_log("Starting testing HDP model")
                accuracy = test_checker_hdp.test_model(hdp, "\\results\\hdp\\{}\\{}\\{}\\{}".format(i, model_settings_index, index, j))
                log_writer.add_log("Testing HDP model done with {}% accuracy".format(accuracy * 100))
                log_writer.add_log("\n\n\n")"""

    output_lda_csv = []
    for item in statistics_to_merge:
        for statistic in item:
            output_lda_csv.append(statistic)
    log_writer.write_2D_list(
        "\\results\\results-stats\\stats{}{}".format(data_sets[i][3],
                                                     start_time),
        output_lda_csv)

log_writer.end_logging()